diff --git a/.cvsignore b/.cvsignore deleted file mode 100644 index 6cee846679a1ada0dea8ea031252e1d854bd44f7..0000000000000000000000000000000000000000 --- a/.cvsignore +++ /dev/null @@ -1,37 +0,0 @@ -.Xrefs -.Xrefs-2.5 -aclocal.m4 -config.log -config.status -config.cache -config.guess -config.sub -configure -Makefile -autoMakefile -autoMakefile.in -.deps -tags -TAGS -lustre*.tar.gz -cscope.files -cscope.out -autom4te-2.53.cache -autom4te.cache -depcomp -compile -.*.cmd -.mergeinfo-* -Rules -missing -mkinstalldirs -install-sh -.depend -.tmp_versions -config.h -config.h.in -stamp-h1 -INSTALL -.pc -patches -series diff --git a/COPYING b/COPYING deleted file mode 100644 index c69cfd8ca427d2b966b41704e0f06cb3f7aa9a30..0000000000000000000000000000000000000000 --- a/COPYING +++ /dev/null @@ -1,352 +0,0 @@ - - NOTE! This copyright does *not* cover user programs that use kernel - services by normal system calls - this is merely considered normal use - of the kernel, and does *not* fall under the heading of "derived work". - Also note that the GPL below is copyrighted by the Free Software - Foundation, but the instance of code that it refers to (the Linux - kernel) is copyrighted by me and others who actually wrote it. - - Linus Torvalds - ----------------------------------------- - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 5c2966a04faffb0b85e9e3d28a80776c2b670119..0000000000000000000000000000000000000000 --- a/ChangeLog +++ /dev/null @@ -1 +0,0 @@ -Please see lnet/ChangeLog and lustre/ChangeLog. diff --git a/README b/README deleted file mode 100644 index c052124dfba17532e0841ef885c8eec489051eca..0000000000000000000000000000000000000000 --- a/README +++ /dev/null @@ -1,2 +0,0 @@ -Instructions for building, configuring and running Lustre can be found at: - http://projects.clusterfs.com/lustre/LustreHowto. diff --git a/README.lustrecvs b/README.lustrecvs deleted file mode 100644 index f17987c0e71948a1c2108adb540c1e02a1d9444a..0000000000000000000000000000000000000000 --- a/README.lustrecvs +++ /dev/null @@ -1,5 +0,0 @@ -To check out Lustre: - -./lustrecvs <branch> - -For a list of branches, please see the branch table in lustrecvs. diff --git a/autoMakefile.am b/autoMakefile.am deleted file mode 100644 index f31200a591e6038dc816d0b4a0836220ac41ca90..0000000000000000000000000000000000000000 --- a/autoMakefile.am +++ /dev/null @@ -1,25 +0,0 @@ -include build/autoMakefile.am.toplevel - -CSTK=/tmp/checkstack -CSTKO=/tmp/checkstack.orig - -checkstack: - [ -f ${CSTK} -a ! -s ${CSTKO} ] && mv -f ${CSTK} ${CSTKO} || true - { for i in lustre/* lnet/* ; do \ - MOD=$$i/`basename $$i`; \ - if [ $$i = "lustre/llite" ]; then MOD=$$i/lustre; fi; \ - [ -f $$MOD.ko ] && MOD=$$MOD.ko || MOD=$$MOD.o; \ - [ -f $$MOD ] && objdump -d $$MOD | perl build/checkstack.pl; \ - done; \ - for i in lnet/klnds/*; do \ - MOD=$$i/k`basename $$i`; \ - [ -f $$MOD.ko ] && MOD=$$MOD.ko || MOD=$$MOD.o; \ - [ -f $$MOD ] && objdump -d $$MOD | perl build/checkstack.pl; \ - done } | sort -nr > ${CSTK} - [ -f ${CSTKO} ] && ! diff -u ${CSTKO} ${CSTK} || head -30 ${CSTK} - -checkstack-update: - [ -f ${CSTK} ] && mv -f ${CSTK} ${CSTKO} - -checkstack-clean: - rm -f ${CSTK} ${CSTKO} diff --git a/autogen.sh b/autogen.sh deleted file mode 100644 index a32c3f8bc95d3d3e890bf7d4ae75d47140cb49b6..0000000000000000000000000000000000000000 --- a/autogen.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -exec bash build/autogen.sh $@ diff --git a/build/.cvsignore b/build/.cvsignore deleted file mode 100644 index 023dff9926883bb09f7605e5503f73be80e3ccb4..0000000000000000000000000000000000000000 --- a/build/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.*.cmd -.*.flags -.*.d -*.ko -*.c -.*.o.d -.tmp_versions -Rules -autoMakefile.in -autoMakefile -lustre.spec diff --git a/build/Makefile b/build/Makefile deleted file mode 100644 index d13bbb70adc7cae05ece1be8d59727d20dc030c5..0000000000000000000000000000000000000000 --- a/build/Makefile +++ /dev/null @@ -1,49 +0,0 @@ -# -# There are three ways this Makefile can be called: -# -# -# 1. As a subdirectory from the toplevel, for automake -# -# 2. A wrapper around the kernel's makefile when building modules, to -# possibly override the .config file -# -# 3. At configure time, as the toplevel module dir for building -# kernel tests -# - -ifeq ($(PATCHLEVEL),) - -ifeq ($(LUSTRE_LINUX_CONFIG),) - -# case #1 -include autoMakefile - -else - -# case #2 -# Note that this comes from make -C $LINUX -f $LUSTRE/build/Makefile -# So the include Makefile includes $LINUX/Makefile, not this file -include $(LUSTRE_LINUX_CONFIG) -include Makefile - -endif # LUSTRE_LINUX_CONFIG - -else # PATCHLEVEL - -# case 3 - -ifneq ($(LUSTRE_KERNEL_TEST),) -ifeq ($(PATCHLEVEL),4) -all: $(LUSTRE_KERNEL_TEST) -else -extra-y = $(LUSTRE_KERNEL_TEST) -endif -endif - -obj-m := conftest.o - -ifeq ($(PATCHLEVEL),4) -include $(TOPDIR)/Rules.make -endif - -endif # PATCHLEVEL diff --git a/build/Makefile.in.toplevel b/build/Makefile.in.toplevel deleted file mode 100644 index 3ae031de33e8b0d6911f1c0a316308c1055be9d9..0000000000000000000000000000000000000000 --- a/build/Makefile.in.toplevel +++ /dev/null @@ -1,4 +0,0 @@ -subdir-m += lnet -subdir-m += lustre - -@INCLUDE_RULES@ diff --git a/build/autoMakefile.am b/build/autoMakefile.am deleted file mode 100644 index 2b1e1cb2d50d7974e55dfeefc8a5e4000a06a18a..0000000000000000000000000000000000000000 --- a/build/autoMakefile.am +++ /dev/null @@ -1,19 +0,0 @@ -EXTRA_DIST := Makefile Makefile.in.toplevel \ - autoMakefile.am.toplevel lbuild linux-merge-config.awk \ - linux-merge-modules.awk linux-rhconfig.h lmake \ - lustre-kernel-2.4.spec.in lustre.spec lustre.spec \ - suse-functions.sh suse-post.sh suse-postun.sh \ - suse-trigger-script.sh.in README.kernel-source \ - sles8-post.sh sles8-postun.sh sles8-pre.sh \ - sles8-update_INITRD_MODULES.sh \ - sles8-update_rcfile_setting.sh \ - update_oldconfig - -CONFIG_CLEAN_FILES := lustre.spec - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ - -distclean: distclean-recursive - @true - -SUBDIRS := autoconf diff --git a/build/autoMakefile.am.toplevel b/build/autoMakefile.am.toplevel deleted file mode 100644 index bcdd810a5d1b37e57b0930bfe3a4e3d9e943037b..0000000000000000000000000000000000000000 --- a/build/autoMakefile.am.toplevel +++ /dev/null @@ -1,72 +0,0 @@ -AUTOMAKE_OPTIONS = foreign - -SUBDIRS := . build @LIBSYSIO_SUBDIR@ @SNMP_SUBDIR@ lnet lustre -DIST_SUBDIRS := build @SNMP_DIST_SUBDIR@ libsysio lnet lustre - -EXTRA_DIST := config.h.in - -# these empty rules are needed so that automake doesn't add its own -# recursive rules -etags-recursive: - -ctags-recursive: - -tags-recursive: - -TAGS: - -tags: - rm -f $(top_srcdir)/TAGS - ETAGSF=`etags --version | grep -iq exuberant && \ - echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ - find $(top_srcdir) -name '*.[hc]' |grep -v linux-stage |xargs etags $$ETAGSF -a - - rm -f $(top_srcdir)/tags - CTAGSF=`ctags --version | grep -iq exuberant && \ - echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \ - find $(top_srcdir) -name '*.[hc]' |grep -v linux-stage |xargs ctags $$CTAGSF -a - -if MODULES -all-sources: - $(MAKE) sources -C lnet - $(MAKE) sources -C lustre - -if LINUX -all-am: modules - -if !LINUX25 -DEP = dep -dep: .depend - -.depend: all-sources - $(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX_OBJ) \ - -f $(PWD)/build/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) \ - -o scripts -o include/config/MARKER _sfdep_$(PWD) \ - _FASTDEP_ALL_SUB_DIRS="$(PWD)" - -CLEANFILES = .depend -endif # !LINUX25 - -modules: $(DEP) all-sources - $(MAKE) $(ARCH_UM) CC="$(CC)" -C $(LINUX_OBJ) \ - -f $(PWD)/build/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) \ - $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o \ - include/config/MARKER $@ -endif # LINUX - -endif # MODULES - -dist-hook: - find $(distdir) -name .deps -o \ - -name CVS -o \ - -name .svn -o \ - -name .#* | xargs rm -rf - -build/lustre.spec: build/lustre.spec.in config.status - ./config.status build/lustre.spec - -rpms: build/lustre.spec dist Makefile - rpmbuild -ta $(distdir).tar.gz - -srpm: build/lustre.spec dist Makefile - rpmbuild -ts $(distdir).tar.gz diff --git a/build/autoconf/lustre-build-linux.m4 b/build/autoconf/lustre-build-linux.m4 deleted file mode 100644 index 84a1272137cce4787a3679276c6960451ab25b5b..0000000000000000000000000000000000000000 --- a/build/autoconf/lustre-build-linux.m4 +++ /dev/null @@ -1,411 +0,0 @@ -# -# LB_LINUX_VERSION -# -# Set things accordingly for a 2.5 kernel -# -AC_DEFUN([LB_LINUX_VERSION], -[LB_CHECK_FILE([$LINUX/include/linux/namei.h], - [ - linux25="yes" - KMODEXT=".ko" - ],[ - KMODEXT=".o" - linux25="no" - ]) -AC_MSG_CHECKING([if you are using Linux 2.6]) -AC_MSG_RESULT([$linux25]) - -MODULE_TARGET="SUBDIRS" -if test $linux25 = "yes" ; then - makerule="$PWD/build" - AC_MSG_CHECKING([for external module build support]) - rm -f build/conftest.i - LB_LINUX_TRY_MAKE([],[], - [$makerule LUSTRE_KERNEL_TEST=conftest.i], - [test -s build/conftest.i], - [ - AC_MSG_RESULT([no]) - ],[ - makerule="_module_$makerule" - MODULE_TARGET="M" - LB_LINUX_TRY_MAKE([],[], - [$makerule LUSTRE_KERNEL_TEST=conftest.i], - [test -s build/conftest.i], - [ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_ERROR([unknown; check config.log for details]) - ]) - ]) -else - makerule="_dir_$PWD/build" -fi - -AC_SUBST(MODULE_TARGET) -AC_SUBST(linux25) -AC_SUBST(KMODEXT) -]) - -# -# LB_LINUX_RELEASE -# -# get the release version of linux -# -AC_DEFUN([LB_LINUX_RELEASE], -[LINUXRELEASE= -rm -f build/conftest.i -AC_MSG_CHECKING([for Linux release]) -if test -s $LINUX_OBJ/include/linux/utsrelease.h ; then - LINUXRELEASEHEADER=utsrelease.h -else - LINUXRELEASEHEADER=version.h -fi -LB_LINUX_TRY_MAKE([ - #include <linux/$LINUXRELEASEHEADER> -],[ - char *LINUXRELEASE; - LINUXRELEASE=UTS_RELEASE; -],[ - $makerule LUSTRE_KERNEL_TEST=conftest.i -],[ - test -s build/conftest.i -],[ - # LINUXRELEASE="UTS_RELEASE" - eval $(grep "LINUXRELEASE=" build/conftest.i) -],[ - AC_MSG_RESULT([unknown]) - AC_MSG_ERROR([Could not preprocess test program. Consult config.log for details.]) -]) -rm -f build/conftest.i -if test x$LINUXRELEASE = x ; then - AC_MSG_RESULT([unknown]) - AC_MSG_ERROR([Could not determine Linux release version from linux/version.h.]) -fi -AC_MSG_RESULT([$LINUXRELEASE]) -AC_SUBST(LINUXRELEASE) - -moduledir='/lib/modules/'$LINUXRELEASE/kernel -AC_SUBST(moduledir) - -modulefsdir='$(moduledir)/fs/$(PACKAGE)' -AC_SUBST(modulefsdir) - -modulenetdir='$(moduledir)/net/$(PACKAGE)' -AC_SUBST(modulenetdir) - -# ------------ RELEASE -------------------------------- -AC_MSG_CHECKING([for Lustre release]) -RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`" -AC_MSG_RESULT($RELEASE) -AC_SUBST(RELEASE) -]) - -# -# LB_LINUX_PATH -# -# Find paths for linux, handling kernel-source rpms -# -AC_DEFUN([LB_LINUX_PATH], -[AC_MSG_CHECKING([for Linux sources]) -AC_ARG_WITH([linux], - AC_HELP_STRING([--with-linux=path], - [set path to Linux source (default=/usr/src/linux)]), - [LINUX=$with_linux], - [LINUX=/usr/src/linux]) -AC_MSG_RESULT([$LINUX]) -AC_SUBST(LINUX) - -# -------- check for linux -------- -LB_CHECK_FILE([$LINUX],[], - [AC_MSG_ERROR([Kernel source $LINUX could not be found.])]) - -# -------- linux objects (for 2.6) -- -AC_MSG_CHECKING([for Linux objects dir]) -AC_ARG_WITH([linux-obj], - AC_HELP_STRING([--with-linux-obj=path], - [set path to Linux objects dir (default=$LINUX)]), - [LINUX_OBJ=$with_linux_obj], - [LINUX_OBJ=$LINUX]) -AC_MSG_RESULT([$LINUX_OBJ]) -AC_SUBST(LINUX_OBJ) - -# -------- check for .config -------- -AC_ARG_WITH([linux-config], - [AC_HELP_STRING([--with-linux-config=path], - [set path to Linux .conf (default=$LINUX_OBJ/.config)])], - [LINUX_CONFIG=$with_linux_config], - [LINUX_CONFIG=$LINUX_OBJ/.config]) -AC_SUBST(LINUX_CONFIG) - -LB_CHECK_FILE([/boot/kernel.h], - [KERNEL_SOURCE_HEADER='/boot/kernel.h'], - [LB_CHECK_FILE([/var/adm/running-kernel.h], - [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h'])]) - -AC_ARG_WITH([kernel-source-header], - AC_HELP_STRING([--with-kernel-source-header=path], - [Use a different kernel version header. Consult build/README.kernel-source for details.]), - [KERNEL_SOURCE_HEADER=$with_kernel_source_header]) - -# ------------ .config exists ---------------- -LB_CHECK_FILE([$LINUX_CONFIG],[], - [AC_MSG_ERROR([Kernel config could not be found. If you are building from a kernel-source rpm consult build/README.kernel-source])]) - -# ----------- make dep run? ------------------ -# at 2.6.19 # $LINUX/include/linux/config.h is removed -# and at more old has only one line -# include <autoconf.h> -LB_CHECK_FILES([$LINUX_OBJ/include/linux/autoconf.h - $LINUX_OBJ/include/linux/version.h - ],[], - [AC_MSG_ERROR([Run make config in $LINUX.])]) -# -LB_CHECK_FILE([$LINUX_OBJ/include/linux/config.h], - [ AC_DEFINE(HAVE_KERNEL_CONFIG_H, 1, - [kernel modules need to include config.h]) - ] -) - -# ------------ rhconfig.h includes runtime-generated bits -- -# red hat kernel-source checks - -# we know this exists after the check above. if the user -# tarred up the tree and ran make dep etc. in it, then -# version.h gets overwritten with a standard linux one. - -if grep rhconfig $LINUX_OBJ/include/linux/version.h >/dev/null ; then - # This is a clean kernel-source tree, we need to - # enable extensive workarounds to get this to build - # modules - LB_CHECK_FILE([$KERNEL_SOURCE_HEADER], - [if test $KERNEL_SOURCE_HEADER = '/boot/kernel.h' ; then - AC_MSG_WARN([Using /boot/kernel.h from RUNNING kernel.]) - AC_MSG_WARN([If this is not what you want, use --with-kernel-source-header.]) - AC_MSG_WARN([Consult build/README.kernel-source for details.]) - fi], - [AC_MSG_ERROR([$KERNEL_SOURCE_HEADER not found. Consult build/README.kernel-source for details.])]) - EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS" -fi - -# this is needed before we can build modules -LB_LINUX_UML -LB_LINUX_VERSION - -# --- check that we can build modules at all -AC_MSG_CHECKING([that modules can be built at all]) -LB_LINUX_TRY_COMPILE([],[],[ - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) - AC_MSG_WARN([Consult config.log for details.]) - AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult build/README.kernel-source]) - AC_MSG_ERROR([Kernel modules cannot be build.]) -]) - -LB_LINUX_RELEASE -]) - -# -# LB_LINUX_UML -# -# check for a uml kernel -# -AC_DEFUN([LB_LINUX_UML], -[ARCH_UM= -UML_CFLAGS= - -AC_MSG_CHECKING([if you are running user mode linux for $target_cpu]) -if test -e $LINUX/include/asm-um ; then - if test X`ls -id $LINUX/include/asm/ 2>/dev/null | awk '{print [$]1}'` = X`ls -id $LINUX/include/asm-um 2>/dev/null | awk '{print [$]1}'` ; then - ARCH_UM='ARCH=um' - # see notes in Rules.in - UML_CFLAGS='-O0' - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT([no (asm doesn't point at asm-um)]) - fi -else - AC_MSG_RESULT([no (asm-um missing)]) -fi -AC_SUBST(ARCH_UM) -AC_SUBST(UML_CFLAGS) -]) - -# these are like AC_TRY_COMPILE, but try to build modules against the -# kernel, inside the build directory - -# -# LB_LINUX_CONFTEST -# -# create a conftest.c file -# -AC_DEFUN([LB_LINUX_CONFTEST], -[cat >conftest.c <<_ACEOF -$1 -_ACEOF -]) - - -# LB_LANG_PROGRAM(C)([PROLOGUE], [BODY]) -# -------------------------------------- -m4_define([LB_LANG_PROGRAM], -[$1 -int -main (void) -{ -dnl Do *not* indent the following line: there may be CPP directives. -dnl Don't move the `;' right after for the same reason. -$2 - ; - return 0; -}]) - -# -# LB_LINUX_COMPILE_IFELSE -# -# like AC_COMPILE_IFELSE -# -AC_DEFUN([LB_LINUX_COMPILE_IFELSE], -[m4_ifvaln([$1], [LB_LINUX_CONFTEST([$1])])dnl -rm -f build/conftest.o build/conftest.mod.c build/conftest.ko -AS_IF([AC_TRY_COMMAND(cp conftest.c build && make [$2] CC="$CC" -f $PWD/build/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM $MODULE_TARGET=$PWD/build) >/dev/null && AC_TRY_COMMAND([$3])], - [$4], - [_AC_MSG_LOG_CONFTEST -m4_ifvaln([$5],[$5])dnl])dnl -rm -f build/conftest.o build/conftest.mod.c build/conftest.mod.o build/conftest.ko m4_ifval([$1], [build/conftest.c conftest.c])[]dnl -]) - -# -# LB_LINUX_TRY_COMPILE -# -# like AC_TRY_COMPILE -# -AC_DEFUN([LB_LINUX_TRY_COMPILE], -[LB_LINUX_COMPILE_IFELSE( - [AC_LANG_SOURCE([LB_LANG_PROGRAM([[$1]], [[$2]])])], - [modules], - [test -s build/conftest.o], - [$3], [$4])]) - -# -# LB_LINUX_CONFIG -# -# check if a given config option is defined -# -AC_DEFUN([LB_LINUX_CONFIG], -[AC_MSG_CHECKING([if Linux was built with CONFIG_$1]) -LB_LINUX_TRY_COMPILE([ -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -],[ -#ifndef CONFIG_$1 -#error CONFIG_$1 not #defined -#endif -],[ -AC_MSG_RESULT([yes]) -$2 -],[ -AC_MSG_RESULT([no]) -$3 -]) -]) - -# -# LB_LINUX_CONFIG_IM -# -# check if a given config option is builtin or as module -# -AC_DEFUN([LB_LINUX_CONFIG_IM], -[AC_MSG_CHECKING([if Linux was built with CONFIG_$1 in or as module]) -LB_LINUX_TRY_COMPILE([#include <linux/config.h>],[ -#if !(defined(CONFIG_$1) || defined(CONFIG_$1_MODULE)) -#error CONFIG_$1 and CONFIG_$1_MODULE not #defined -#endif -],[ -AC_MSG_RESULT([yes]) -$2 -],[ -AC_MSG_RESULT([no]) -$3 -]) -]) - -# -# LB_LINUX_TRY_MAKE -# -# like LB_LINUX_TRY_COMPILE, but with different arguments -# -AC_DEFUN([LB_LINUX_TRY_MAKE], -[LB_LINUX_COMPILE_IFELSE([AC_LANG_SOURCE([LB_LANG_PROGRAM([[$1]], [[$2]])])], [$3], [$4], [$5], [$6])]) - -# -# LB_LINUX_CONFIG_BIG_STACK -# -# check for big stack patch -# -AC_DEFUN([LB_LINUX_CONFIG_BIG_STACK], -[if test "x$ARCH_UM" = "x" -a "x$linux25" = "xno" ; then - case $target_cpu in - i?86 | x86_64) - LB_LINUX_CONFIG([STACK_SIZE_16KB],[],[ - LB_LINUX_CONFIG([STACK_SIZE_32KB],[],[ - LB_LINUX_CONFIG([STACK_SIZE_64KB],[],[ - AC_MSG_ERROR([Lustre requires that Linux is configured with at least a 16KB stack.]) - ]) - ]) - ]) - ;; - esac -fi -]) - -# -# LB_PROG_LINUX -# -# linux tests -# -AC_DEFUN([LB_PROG_LINUX], -[LB_LINUX_PATH - -LB_LINUX_CONFIG([MODULES],[],[ - AC_MSG_ERROR([module support is required to build Lustre kernel modules.]) -]) - -LB_LINUX_CONFIG([MODVERSIONS]) - -LB_LINUX_CONFIG([PREEMPT],[ - AC_MSG_ERROR([Lustre does not support kernels with preempt enabled.]) -]) - -LB_LINUX_CONFIG([KALLSYMS],[],[ -if test "x$ARCH_UM" = "x" ; then - AC_MSG_ERROR([Lustre requires that CONFIG_KALLSYMS is enabled in your kernel.]) -fi -]) - -LB_LINUX_CONFIG([KMOD],[],[ - AC_MSG_WARN([]) - AC_MSG_WARN([Kernel module loading support is highly recommended.]) - AC_MSG_WARN([]) -]) - -#LB_LINUX_CONFIG_BIG_STACK - -# LNet tests -LN_PROG_LINUX - -# Lustre tests -LC_PROG_LINUX - -]) - -# -# LB_LINUX_CONDITIONALS -# -# AM_CONDITIONALS for linux -# -AC_DEFUN([LB_LINUX_CONDITIONALS], -[AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) -]) - diff --git a/build/autoconf/lustre-build.m4 b/build/autoconf/lustre-build.m4 deleted file mode 100644 index 76970fdeaf8819584596a61a68583f09ee17972e..0000000000000000000000000000000000000000 --- a/build/autoconf/lustre-build.m4 +++ /dev/null @@ -1,576 +0,0 @@ -# -# LB_CHECK_VERSION -# -# Verify that LUSTRE_VERSION was defined properly -# -AC_DEFUN([LB_CHECK_VERSION], -[if test "LUSTRE_VERSION" = "LUSTRE""_VERSION" ; then - AC_MSG_ERROR([This script was not built with a version number.]) -fi -]) - -# -# LB_CANONICAL_SYSTEM -# -# fixup $target_os for use in other places -# -AC_DEFUN([LB_CANONICAL_SYSTEM], -[case $target_os in - linux*) - lb_target_os="linux" - ;; - darwin*) - lb_target_os="darwin" - ;; - solaris2.11*) - lb_target_os="SunOS" - ;; -esac -AC_SUBST(lb_target_os) -]) - -# -# LB_CHECK_FILE -# -# Check for file existance even when cross compiling -# -AC_DEFUN([LB_CHECK_FILE], -[AS_VAR_PUSHDEF([lb_File], [lb_cv_file_$1])dnl -AC_CACHE_CHECK([for $1], lb_File, -[if test -r "$1"; then - AS_VAR_SET(lb_File, yes) -else - AS_VAR_SET(lb_File, no) -fi]) -AS_IF([test AS_VAR_GET(lb_File) = yes], [$2], [$3])[]dnl -AS_VAR_POPDEF([lb_File])dnl -])# LB_CHECK_FILE - -# -# LB_CHECK_FILES -# -# LB_CHECK_FILE over multiple files -# -AC_DEFUN([LB_CHECK_FILES], -[AC_FOREACH([AC_FILE_NAME], [$1], - [LB_CHECK_FILE(AC_FILE_NAME, - [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_[]AC_FILE_NAME), 1, - [Define to 1 if you have the - file `]AC_File['.]) -$2], - [$3])])]) - -# -# LB_ARG_LIBS_INCLUDES -# -# support for --with-foo, --with-foo-includes, and --with-foo-libs in -# a single magical macro -# -AC_DEFUN([LB_ARG_LIBS_INCLUDES], -[lb_pathvar="m4_bpatsubst([$2], -, _)" -AC_MSG_CHECKING([for $1]) -AC_ARG_WITH([$2], - AC_HELP_STRING([--with-$2=path], - [path to $1]), - [],[withval=$4]) - -if test x$withval = xyes ; then - eval "$lb_pathvar='$3'" -else - eval "$lb_pathvar='$withval'" -fi -AC_MSG_RESULT([${!lb_pathvar:-no}]) - -if test x${!lb_pathvar} != x -a x${!lb_pathvar} != xno ; then - AC_MSG_CHECKING([for $1 includes]) - AC_ARG_WITH([$2-includes], - AC_HELP_STRING([--with-$2-includes=path], - [path to $1 includes]), - [],[withval='yes']) - - lb_includevar="${lb_pathvar}_includes" - if test x$withval = xyes ; then - eval "${lb_includevar}='${!lb_pathvar}/include'" - else - eval "${lb_includevar}='$withval'" - fi - AC_MSG_RESULT([${!lb_includevar}]) - - AC_MSG_CHECKING([for $1 libs]) - AC_ARG_WITH([$2-libs], - AC_HELP_STRING([--with-$2-libs=path], - [path to $1 libs]), - [],[withval='yes']) - - lb_libvar="${lb_pathvar}_libs" - if test x$withval = xyes ; then - eval "${lb_libvar}='${!lb_pathvar}/lib'" - else - eval "${lb_libvar}='$withval'" - fi - AC_MSG_RESULT([${!lb_libvar}]) -fi -]) -]) - -# -# LB_PATH_LIBSYSIO -# -# Handle internal/external libsysio -# -AC_DEFUN([LB_PATH_LIBSYSIO], -[AC_ARG_WITH([sysio], - AC_HELP_STRING([--with-sysio=path], - [set path to libsysio source (default is included libsysio)]), - [],[ - case $lb_target_os in - linux) - with_sysio='yes' - ;; - *) - with_sysio='no' - ;; - esac - ]) -AC_MSG_CHECKING([location of libsysio]) -enable_sysio="$with_sysio" -case x$with_sysio in - xyes) - AC_MSG_RESULT([internal]) - LB_CHECK_FILE([$srcdir/libsysio/src/rmdir.c],[],[ - AC_MSG_ERROR([A complete internal libsysio was not found.]) - ]) - LIBSYSIO_SUBDIR="libsysio" - SYSIO="$PWD/libsysio" - ;; - xno) - AC_MSG_RESULT([disabled]) - ;; - *) - AC_MSG_RESULT([$with_sysio]) - LB_CHECK_FILE([$with_sysio/lib/libsysio.a],[],[ - AC_MSG_ERROR([A complete (built) external libsysio was not found.]) - ]) - SYSIO=$with_sysio - with_sysio="yes" - ;; -esac - -# We have to configure even if we don't build here for make dist to -# work -AC_CONFIG_SUBDIRS(libsysio) -]) - -# -# LB_CONFIG_CRAY_XT3 -# -# Enable Cray XT3 features -# -AC_DEFUN([LB_CONFIG_CRAY_XT3], -[AC_MSG_CHECKING([whether to build Cray XT3 features]) -AC_ARG_ENABLE([cray_xt3], - AC_HELP_STRING([--enable-cray-xt3], - [enable building of Cray XT3 features]), - [enable_cray_xt3='yes'],[enable_cray_xt3='no']) -AC_MSG_RESULT([$enable_cray_xt3]) -if test x$enable_cray_xt3 != xno; then - AC_DEFINE(CRAY_XT3, 1, Enable Cray XT3 Features) -fi -]) - -# -# LB_CONFIG_BGL -# -# Enable BGL features -# -AC_DEFUN([LB_CONFIG_BGL], -[AC_MSG_CHECKING([whether to build BGL features]) -AC_ARG_ENABLE([bgl], - AC_HELP_STRING([--enable-bgl], - [enable building of BGL features]), - [enable_bgl='yes'],[enable_bgl='no']) -AC_MSG_RESULT([$enable_bgl]) -if test x$enable_bgl != xno; then - AC_DEFINE(BGL_SUPPORT, 1, Enable BGL Features) - enable_doc='no' - enable_tests='no' - enable_server='no' - enable_liblustre='no' - enable_libreadline='no' -fi -]) - - -# -# LB_CONFIG_UOSS -# -# -AC_DEFUN([LB_CONFIG_UOSS], -[AC_MSG_CHECKING([whether to build user-level oss]) -AC_ARG_ENABLE([uoss], - AC_HELP_STRING([--enable-uoss], - [enable building of user-level oss]), - [enable_uoss='yes'],[enable_uoss='no']) -if test x$enable_uoss != xno; then - AC_DEFINE(UOSS_SUPPORT, 1, Enable user-level OSS) - AC_DEFINE(LUSTRE_ULEVEL_MT, 1, Multi-threaded user-level lustre port) - enable_modules='no' -fi -]) - -# -# LB_PATH_SNMP -# -# check for in-tree snmp support -# -AC_DEFUN([LB_PATH_SNMP], -[LB_CHECK_FILE([$srcdir/snmp/lustre-snmp.c],[SNMP_DIST_SUBDIR="snmp"]) -AC_SUBST(SNMP_DIST_SUBDIR) -AC_SUBST(SNMP_SUBDIR) -]) - -# -# LB_CONFIG_MODULES -# -# Build kernel modules? -# -AC_DEFUN([LB_CONFIG_MODULES], -[AC_MSG_CHECKING([whether to build kernel modules]) -AC_ARG_ENABLE([modules], - AC_HELP_STRING([--disable-modules], - [disable building of Lustre kernel modules]), - [],[ - LC_TARGET_SUPPORTED([ - enable_modules='yes' - ],[ - enable_modules='no' - ]) - ]) -AC_MSG_RESULT([$enable_modules ($target_os)]) - -if test x$enable_modules = xyes ; then - case $target_os in - linux*) - LB_PROG_LINUX - ;; - darwin*) - LB_PROG_DARWIN - ;; - *) - # This is strange - Lustre supports a target we don't - AC_MSG_ERROR([Modules are not supported on $target_os]) - ;; - esac -fi -]) - -# -# LB_CONFIG_UTILS -# -# Build utils? -# -AC_DEFUN([LB_CONFIG_UTILS], -[AC_MSG_CHECKING([whether to build utilities]) -AC_ARG_ENABLE([utils], - AC_HELP_STRING([--disable-utils], - [disable building of Lustre utility programs]), - [],[enable_utils='yes']) -AC_MSG_RESULT([$enable_utils]) -if test x$enable_utils = xyes ; then - LB_CONFIG_INIT_SCRIPTS -fi -]) - -# -# LB_CONFIG_TESTS -# -# Build tests? -# -AC_DEFUN([LB_CONFIG_TESTS], -[AC_MSG_CHECKING([whether to build Lustre tests]) -AC_ARG_ENABLE([tests], - AC_HELP_STRING([--disable-tests], - [disable building of Lustre tests]), - [], - [ - enable_tests='yes' - ]) -AC_MSG_RESULT([$enable_tests]) -]) - -# -# LB_CONFIG_DOCS -# -# Build docs? -# -AC_DEFUN([LB_CONFIG_DOCS], -[AC_MSG_CHECKING([whether to build docs]) -AC_ARG_ENABLE(doc, - AC_HELP_STRING([--disable-doc], - [skip creation of pdf documentation]), - [ - if test x$enable_doc = xyes ; then - ENABLE_DOC=1 - else - ENABLE_DOC=0 - fi - ],[ - ENABLE_DOC=0 - enable_doc='no' - ]) -AC_MSG_RESULT([$enable_doc]) -AC_SUBST(ENABLE_DOC) -]) - -# -# LB_CONFIG_INIT_SCRIPTS -# -# our init scripts only work on red hat linux -# -AC_DEFUN([LB_CONFIG_INIT_SCRIPTS], -[ENABLE_INIT_SCRIPTS=0 -if test x$enable_utils = xyes ; then - AC_MSG_CHECKING([whether to install init scripts]) - # our scripts only work on red hat systems - if test -f /etc/init.d/functions -a -f /etc/sysconfig/network ; then - ENABLE_INIT_SCRIPTS=1 - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -AC_SUBST(ENABLE_INIT_SCRIPTS) -]) - -# -# LB_CONFIG_HEADERS -# -# add -include config.h -# -AC_DEFUN([LB_CONFIG_HEADERS], -[AC_CONFIG_HEADERS([config.h]) -CPPFLAGS="-include \$(top_builddir)/config.h $CPPFLAGS" -EXTRA_KCFLAGS="-include $PWD/config.h $EXTRA_KCFLAGS" -AC_SUBST(EXTRA_KCFLAGS) -]) - -# -# LB_INCLUDE_RULES -# -# defines for including the toplevel Rules -# -AC_DEFUN([LB_INCLUDE_RULES], -[INCLUDE_RULES="include $PWD/build/Rules" -AC_SUBST(INCLUDE_RULES) -]) - -# -# LB_PATH_DEFAULTS -# -# 'fixup' default paths -# -AC_DEFUN([LB_PATH_DEFAULTS], -[# directories for binaries -AC_PREFIX_DEFAULT([/usr]) - -sysconfdir='/etc' -AC_SUBST(sysconfdir) - -# Directories for documentation and demos. -docdir='${datadir}/doc/$(PACKAGE)' -AC_SUBST(docdir) - -LN_PATH_DEFAULTS -LC_PATH_DEFAULTS - -]) - -# -# LB_PROG_CC -# -# checks on the C compiler -# -AC_DEFUN([LB_PROG_CC], -[AC_PROG_RANLIB -AC_MSG_CHECKING([for buggy compiler]) -CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"` -bad_cc() { - AC_MSG_RESULT([buggy compiler found!]) - echo - echo " '$CC_VERSION'" - echo " has been known to generate bad code, " - echo " please get an updated compiler." - AC_MSG_ERROR([sorry]) -} -case "$CC_VERSION" in - "gcc version 2.95"*) - bad_cc - ;; - # ost_pack_niobuf putting 64bit NTOH temporaries on the stack - # without "sub $0xc,%esp" to protect the stack from being - # stomped on by interrupts (bug 606) - "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)") - bad_cc - ;; - # mandrake's similar sub 0xc compiler bug - # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2 - "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)") - bad_cc - ;; - *) - AC_MSG_RESULT([no known problems]) - ;; -esac - -# --------- unsigned long long sane? ------- -AC_CHECK_SIZEOF(unsigned long long, 0) -echo "---> size SIZEOF $SIZEOF_unsigned_long_long" -echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long" -if test $ac_cv_sizeof_unsigned_long_long != 8 ; then - AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com]) -fi - -# FIXME -AC_CHECK_DECL([__i386__], [], [ - -if test x$enable_bgl != xyes; then -AC_MSG_CHECKING([if $CC accepts -m64]) -CC_save="$CC" -CC="$CC -m64" -AC_TRY_COMPILE([],[],[ - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) - CC="$CC_save" -]) -fi - -]) - -CPPFLAGS="-I\$(top_builddir)/lnet/include -I\$(top_srcdir)/lnet/include -I\$(top_builddir)/lustre/include -I\$(top_srcdir)/lustre/include $CPPFLAGS" - -LLCPPFLAGS="-D__arch_lib__ -D_LARGEFILE64_SOURCE=1" -AC_SUBST(LLCPPFLAGS) - -LLCFLAGS="-g -Wall -fPIC" -AC_SUBST(LLCFLAGS) - -# everyone builds against lnet and lustre -EXTRA_KCFLAGS="$EXTRA_KCFLAGS -g -I$PWD/lnet/include -I$PWD/lustre/include" -AC_SUBST(EXTRA_KCFLAGS) -]) - -# -# LB_CONTITIONALS -# -# AM_CONDITIONAL instances for everything -# (so that portals/lustre can disable some if needed) -AC_DEFUN([LB_CONDITIONALS], -[AM_CONDITIONAL(MODULES, test x$enable_modules = xyes) -AM_CONDITIONAL(UTILS, test x$enable_utils = xyes) -AM_CONDITIONAL(TESTS, test x$enable_tests = xyes) -AM_CONDITIONAL(DOC, test x$ENABLE_DOC = x1) -AM_CONDITIONAL(INIT_SCRIPTS, test x$ENABLE_INIT_SCRIPTS = "x1") -AM_CONDITIONAL(LINUX, test x$lb_target_os = "xlinux") -AM_CONDITIONAL(DARWIN, test x$lb_target_os = "xdarwin") -AM_CONDITIONAL(CRAY_XT3, test x$enable_cray_xt3 = "xyes") -AM_CONDITIONAL(SUNOS, test x$lb_target_os = "xSunOS") - -# this lets lustre cancel libsysio, per-branch or if liblustre is -# disabled -if test "x$LIBSYSIO_SUBDIR" = xlibsysio ; then - if test "x$with_sysio" != xyes ; then - SYSIO="" - LIBSYSIO_SUBDIR="" - fi -fi -AC_SUBST(LIBSYSIO_SUBDIR) -AC_SUBST(SYSIO) - -LB_LINUX_CONDITIONALS -LB_DARWIN_CONDITIONALS -# LB_SUNOS_CONDITIONALS - -LN_CONDITIONALS -LC_CONDITIONALS -]) - -# -# LB_CONFIGURE -# -# main configure steps -# -AC_DEFUN([LB_CONFIGURE], -[LB_CANONICAL_SYSTEM - -LB_INCLUDE_RULES - -LB_CONFIG_CRAY_XT3 -LB_CONFIG_BGL -LB_CONFIG_UOSS -LB_PATH_DEFAULTS - -LB_PROG_CC - -LB_PATH_LIBSYSIO -LB_PATH_SNMP - -LB_CONFIG_DOCS -LB_CONFIG_UTILS -LB_CONFIG_TESTS -LC_CONFIG_CLIENT_SERVER - -# three macros for cmd3 -LC_CONFIG_SPLIT -LC_CONFIG_LDISKFS -LN_CONFIG_CDEBUG - -LB_CONFIG_MODULES - -LC_CONFIG_LIBLUSTRE -LN_CONFIGURE - -LC_CONFIGURE - -if test "$SNMP_DIST_SUBDIR" ; then - LS_CONFIGURE -fi - -LB_CONDITIONALS -LB_CONFIG_HEADERS - -AC_CONFIG_FILES( -[Makefile:build/Makefile.in.toplevel] -[autoMakefile -build/autoMakefile -build/autoconf/Makefile -build/Rules -build/lustre.spec -]) - -LN_CONFIG_FILES -LC_CONFIG_FILES -if test "$SNMP_DIST_SUBDIR" ; then - LS_CONFIG_FILES -fi - -AC_SUBST(ac_configure_args) - -MOSTLYCLEANFILES='.*.cmd .*.flags *.o *.ko *.mod.c .depend .*.1.*' -AC_SUBST(MOSTLYCLEANFILES) - -AC_OUTPUT - -cat <<_ACEOF - -CC: $CC -LD: $LD -CPPFLAGS: $CPPFLAGS -LLCPPFLAGS: $LLCPPFLAGS -CFLAGS: $CFLAGS -EXTRA_KCFLAGS: $EXTRA_KCFLAGS -LLCFLAGS: $LLCFLAGS - -Type 'make' to build Lustre. -_ACEOF -]) diff --git a/build/autogen.sh b/build/autogen.sh deleted file mode 100644 index 94e9ad3e84970250d70aeaf7e1e67249fa71f680..0000000000000000000000000000000000000000 --- a/build/autogen.sh +++ /dev/null @@ -1,116 +0,0 @@ -#!/bin/bash - -# taken from gnome-common/macros2/autogen.sh -compare_versions() { - ch_min_version=$1 - ch_actual_version=$2 - ch_status=0 - IFS="${IFS= }"; ch_save_IFS="$IFS"; IFS="." - set $ch_actual_version - for ch_min in $ch_min_version; do - ch_cur=`echo $1 | sed 's/[^0-9].*$//'`; shift # remove letter suffixes - if [ -z "$ch_min" ]; then break; fi - if [ -z "$ch_cur" ]; then ch_status=1; break; fi - if [ $ch_cur -gt $ch_min ]; then break; fi - if [ $ch_cur -lt $ch_min ]; then ch_status=1; break; fi - done - IFS="$ch_save_IFS" - return $ch_status -} - -error_msg() { - echo "$cmd is $1. version $required is required to build Lustre." - - if [ -e /usr/lib/autolustre/bin/$cmd ]; then - cat >&2 <<-EOF - You apparently already have Lustre-specific autoconf/make RPMs - installed on your system at /usr/lib/autolustre/share/$cmd. - Please set your PATH to point to those versions: - - export PATH="/usr/lib/autolustre/bin:\$PATH" - EOF - else - cat >&2 <<-EOF - CFS provides RPMs which can be installed alongside your - existing autoconf/make RPMs, if you are nervous about - upgrading. See - - ftp://ftp.lustre.org/pub/other/autolustre/README.autolustre - - You may be able to download newer version from: - - http://ftp.gnu.org/gnu/$tool/$tool-$required.tar.gz - EOF - fi - [ "$cmd" = "autoconf" -a "$required" = "2.57" ] && cat >&2 <<EOF - -or for RH9 systems you can use: - -ftp://fr2.rpmfind.net/linux/redhat/9/en/os/i386/RedHat/RPMS/autoconf-2.57-3.noarch.rpm -EOF - [ "$cmd" = "automake-1.7" -a "$required" = "1.7.8" ] && cat >&2 <<EOF - -or for RH9 systems you can use: - -ftp://fr2.rpmfind.net/linux/fedora/core/1/i386/os/Fedora/RPMS/automake-1.7.8-1.noarch.rpm -EOF - exit 1 -} - -check_version() { - local tool - local cmd - local required - local version - - tool=$1 - cmd=$2 - required=$3 - echo -n "checking for $cmd $required... " - if ! $cmd --version >/dev/null ; then - error_msg "missing" - fi - version=$($cmd --version | awk "/$tool \(GNU/ { print \$4 }") - echo "found $version" - if ! compare_versions "$required" "$version" ; then - error_msg "too old" - fi -} - -echo "Checking for a complete tree..." -# required directories -for dir in build lnet lustre ; do - if [ ! -d "$dir" ] ; then - cat >&2 <<EOF -Your tree seems to be missing $dir. -Please read README.lustrecvs for details. -EOF - exit 1 - fi - ACLOCAL_FLAGS="$ACLOCAL_FLAGS -I $PWD/$dir/autoconf" -done -# some are optional -for dir in snmp portals; do - if [ -d "$dir" ] ; then - ACLOCAL_FLAGS="$ACLOCAL_FLAGS -I $PWD/$dir/autoconf" - fi -done - -check_version automake automake-1.7 "1.7.8" -check_version autoconf autoconf "2.57" - -echo "Running aclocal-1.7 $ACLOCAL_FLAGS..." -aclocal-1.7 $ACLOCAL_FLAGS -echo "Running autoheader..." -autoheader -echo "Running automake-1.7..." -automake-1.7 -a -c -echo "Running autoconf..." -autoconf - -if [ -d libsysio ] ; then - pushd libsysio >/dev/null - echo "Running autogen for libsysio..." - sh autogen.sh - popd >/dev/null -fi diff --git a/build/branch.sh b/build/branch.sh deleted file mode 100755 index 88c0e420c5f79278c0318271d4e4bb722044bad1..0000000000000000000000000000000000000000 --- a/build/branch.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -e -CVS=${CVS:-cvs} - -progname=${0##*/} - -if [ $# -lt 2 -o $# -gt 3 ]; then - echo "This creates a new branch in CVS. Usage: $progname parent child <dir>" - exit -fi - -parent=$1 -child=$2 -CHILD=`echo $child | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -dir=${3:-.} -if [ ! -d $dir ]; then - echo >&2 "${progname}: directory '$dir' does not exist." - exit 1 -fi -module=$(basename $(<$dir/CVS/Repository)) - -if [ "$module" = "lustre" ]; then - echo >&2 "${progname}: You probably want to branch lustre or lnet." - echo >&2 "${progname}: Try using ${0} $parent $child lustre" - exit 1 -fi - -case $parent in - HEAD) : ;; - b_*|b1*) : ;; - *) parent="b_$parent" ;; -esac -case $child in - HEAD) : ;; - b_*|b1*) : ;; - *) child="b_$child" -esac - -if [ "$parent" != "HEAD" -a -f $dir/CVS/Tag ]; then - # put in separate condition as bash evaluates all conditions unlike C - if [ "`cat $dir/CVS/Tag`" != "T$parent" ]; then - echo "This script must be run within the $parent branch" - exit 1 - fi -fi - -echo parent: $parent CHILD: $CHILD child: $child date: $date - -echo -n "tagging $parent as '${CHILD}_BASE' ...." -$CVS rtag -r $parent ${CHILD}_BASE $module -echo "done" -echo -n "branching $child at ${CHILD}_BASE' ...." -$CVS rtag -b -r ${CHILD}_BASE $child $module -echo -n "updating $dir to $child ...." -$CVS update -r $child $dir -echo "done" diff --git a/build/buildcvs b/build/buildcvs deleted file mode 100644 index 43a211228fd9f9b8f275f8312c0e614acc4c8b20..0000000000000000000000000000000000000000 --- a/build/buildcvs +++ /dev/null @@ -1,182 +0,0 @@ -# This file is sourced by lustre/lustrecvs - -portalstag="" -lnettag="HEAD" -libsysiotag="HEAD" -snmptag="HEAD" - -export LC_COLLATE=C - -case "$lustretag" in - '') - warn "a lustretag is required." - usage >&2 - exit 1 - ;; - --help | -h) - usage - exit 0 - ;; - - # this is the branch table - # keep this list sorted alphabetically! - - # Note these are "specials" -- branches using lnet HEAD don't need an - # entry here. - - b1_2) - portalstag="b1_2" - lnettag="" - libsysiotag="" - snmptag="" - ;; - - b1_4_atime_update) - portalstag="b_hd_newconfig" - ;; - - b1_4_join) - portalstag="b_hd_newconfig" - ;; - - b1_4_lfs_df) - portalstag="b_hd_newconfig" - ;; - - b1_4_lov_lvb_cleanup) - portalstag="b_hd_newconfig" - ;; - - b1_4_next_recovery_transno) - portalstag="b_hd_newconfig" - ;; - - b_new_cmd) - portalstag="b_new_portals" - # lnettag="b_lnet_tmp" - ;; - - b_cmd*) - portalstag="$lustretag" - ;; - - b_iam*) - portalstag="b_hd_newconfig" - # XXX temorary tag until b_iam* is updated from b1_4 liblustre - libsysiotag="HEAD_RELEASE_1_4_6_LAND_PARENT_20060223_1455" - ;; - - b_ioprovement) - portalstag="b_ioprovement" - ;; - - b_newconfig_rdmarouting) - portalstag="b_hd_newconfig" - lnettag="b_newconfig_rdmarouting" - lustretag="b1_4" - ;; - - b_port_ahead) - portalstag="b_port_ahead" - ;; - - b_port_netid) - portalstag="b_port_netid" - ;; - - # b_port_step is only for portals - b_port_step) - portalstag="b_port_step" - lustretag="HEAD" - ;; - - b_port_test) - portalstag="b_port_test" - ;; - - b_ptl_smallfix) - portalstag="b_ptl_smallfix" - lustretag="b1_4" - ;; - - b_ptlrpc_cleanup) - portalstag="b_ptlrpc_cleanup" - ;; - - b_self_test) - # lnet self test development branch - lnettag="b_self_test" - lustretag="HEAD" - ;; - - # before 1_4_6, we didn't have lnet or snmp - b_release_1_2_*|b_release_1_4_[0-5]) - portalstag="$lustretag" - lnettag="" - libsysiotag="$lustretag" - snmptag="" - ;; - - b_release_1_4_6-patchless) - portalstag=b_release_1_4_6 - lnettag=b_release_1_4_6-patchless - libsysiotag=b_release_1_4_6 - snmptag=b_release_1_4_6 - ;; - - b_release_1_4_6) - portalstag="$lustretag" - lnettag="$lustretag" - libsysiotag="$lustretag" - snmptag="$lustretag" - ;; - - # all later b_release_* tags - b_release_*) - lnettag="$lustretag" - libsysiotag="$lustretag" - snmptag="$lustretag" - ;; - - # uOSS - b_uoss) - lnettag="b_uoss" - ;; - - # v1.0-v1.3, v1.4.0-v1.4.2 - v1_[0-3]_*|v1_4_[0-2]|v1_4_[0-2]_*) - portalstag="$lustretag" - lnettag="" - libsysiotag="$lustretag" - snmptag="" - ;; - - # v1.4.3-v1.4.5, v1.4.5.1 - v1.4.5.9 - v1_4_[3-5]|v1_4_[3-4]_*|v1_4_5_[1-9]|cray_2005*) - portalstag="$lustretag" - lnettag="" - libsysiotag="$lustretag" - snmptag="$lustretag" - ;; - - # v1.4.6, v1.4.6.[1-91] - v1_4_6_[1-9]|v1_4_6_9[01]) - portalstag="$lustretag" - lnettag="" - libsysiotag="$lustretag" - snmptag="$lustretag" - ;; - - # all later v* tags - v[1-9]*) - lnettag="$lustretag" - libsysiotag="$lustretag" - snmptag="$lustretag" - ;; -esac - -cvs_cmd libsysio libsysio "$libsysiotag" -cvs_cmd portals portals "$portalstag" -cvs_cmd lnet lnet "$lnettag" -cvs_cmd snmp lustre-snmp "$snmptag" -cvs_cmd lustre lustre-core "$lustretag" diff --git a/build/cvsdiffclient b/build/cvsdiffclient deleted file mode 100644 index 66cd6b6e1fb732dcc489920950aa4a76039925f0..0000000000000000000000000000000000000000 --- a/build/cvsdiffclient +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# Put this script and cvs-modified-files.pl into your PATH (~bin is good) and -# -# export CVSEDITOR=cvsdiffclient -# -# in your .bashrc and you will get a nice bunch of CVS commit reminders: -# <merge/land tag information> -# b=<bug_number> -# i=<inspected_by> -# i=<inspected_by> -# -# Remember to remove the leading "CVS: " part of the comment before saving -# your commit comment if you want those entries to be saved. - -[ -f .mergeinfo ] && . ./.mergeinfo - -CVS_MODIFIED_FILES_PL=$(which cvs-modified-files.pl 2>/dev/null) -if [ -z "$CVS_MODIFIED_FILES_PL" ]; then - CVS_MODIFIED_FILES_PL=$(dirname $0)/cvs-modified-files.pl -fi -FILES=$($CVS_MODIFIED_FILES_PL $1) -TMP=`mktemp /tmp/cvslog-XXXXXXXX` -if [ -f $TMP ]; then - [ -f .mergeinfo ] && . .mergeinfo - [ -z "$PARENT" -a -f lustre/.mergeinfo ] && . lustre/.mergeinfo - if [ "$PARENT" ]; then - [ "$OPERATION" ] || OPERATION=Update - [ "$OPERWHERE" ] || OPERWHERE=from - echo "CVS: $OPERATION $child $OPERWHERE $parent ($date)" >> $TMP - else - [ -r CVS/Tag ] && TAG=CVS/Tag - [ -z "$TAG" -a -r lustre/CVS/Tag ] && TAG=lustre/CVS/Tag - [ "$TAG" ] && BRANCH="`sed 's/^T//' $TAG`" || BRANCH="HEAD" - echo "CVS: Branch $BRANCH" >> $TMP - fi -cat - >> $TMP <<- EOF - CVS: Remove "CVS:" from start of lines that should be in commit message - CVS: did you update the ChangeLog for a bug fix? - CVS: did you update the hours spent in Bugzilla? - CVS: did you verify/update the HLD/DLD in CVS? - CVS: b=<bug> - CVS: i=<inspected_by> - CVS: i=<inspected_by> -EOF - - cat $1 >> $TMP - cp $TMP $1 - rm $TMP -fi - -if [ "${FILES:+have_files}"x = have_filesx ]; then - echo Diffing $1 : $FILES - cvs diff -wbBup $FILES 2>/dev/null | sed "s/^/CVS: /" >>$1 -fi -#gnuclient $1 || vi $1 -[ "$EDITOR" ] || EDITOR=vi - -$EDITOR $1 diff --git a/build/cvsrc b/build/cvsrc deleted file mode 100644 index 665cd8f759ccbf42b28f284cd7baaac4a5adeb27..0000000000000000000000000000000000000000 --- a/build/cvsrc +++ /dev/null @@ -1,5 +0,0 @@ -cvs -z3 -q -diff -u -p -co -d -P -update -d -P -commit diff --git a/build/kabi b/build/kabi deleted file mode 100755 index a57a46c1f2e4db3cd1eedfeace47823e40a032bf..0000000000000000000000000000000000000000 --- a/build/kabi +++ /dev/null @@ -1,364 +0,0 @@ -#! /usr/bin/perl -# kabi - Linux Kernel Application Binary Interface manager -# Copyright (C) 2005 Cluster File Systems, Inc. -# All rights reserved. -# -# Gordon Matzigkeit <gord@clusterfs.com>, 2005-10-21 - -use warnings; -use strict; - -my $VERSION = '0.2'; - -my $CC = $ENV{'CC'} || 'gcc'; -my $LINUX = '/usr/src/linux'; -my $MODE; -my $OUTPUT; -my @ARGS; -my $VERBOSE = 0; - -my $progname = $0; -$progname =~ s/^.*\///; -my $modename = $progname; - -sub usage -{ - my ($status) = @_; - if ($status) { - print STDERR "Try \`$0 --help' for more information\n"; - } else { - print <<EOF; -Usage: [CC=COMPILER] $0 [OPTION]... MODE ARGS... - -Manage binary compatibility between a Linux kernel and kernel modules. - -The CC environment variable specifies the compiler used to build the -kernel and modules. - - --help display this message and exit --o, --output=KABI specify the name of the KABI file created by the - \`module\' mode [default=strip .ko and add .kabi] --v, --verbose give reasons for rejecting KABI matches - --version print version information - --with-linux=DIR set the path to the kernel sources - -MODE and ARGS can be one of the following: - - archive DIR KMOD install a KABI and kernel module in a unique place in DIR - match FILE... print a list of KABI files which are compatible with - the specified kernel and any specified kernel modules - - module KMOD generate a KABI file for the specified kernel module - -Written by Gordon Matzigkeit <gord\@clusterfs.com> for Cluster File Systems. -EOF - } - exit $status; -} - -my @args = @ARGV; -while ($#args >= 0) { - if ($args[0] =~ /^--with-l(i(n(u(x)?)?)?)?=(.*)/) { - $LINUX = $5; - } elsif ($args[0] =~ /^--with-l(i(n(u(x)?)?)?)?$/) { - shift @args; - $LINUX = $args[0]; - } elsif ($args[0] =~ /^--h(e(l(p)?)?)?$/) { - usage(0); - } elsif ($args[0] =~ /^--vers(i(o(n)?)?)?$/) { - print "KABI $VERSION\n"; - exit 0; - } elsif ($args[0] eq '-v' || $args[0] =~ /^--verb(o(s(e)?)?)?$/) { - $VERBOSE = 1; - } elsif ($args[0] =~ /^-/) { - print STDERR "$progname: unrecognized option \`$args[0]'\n"; - usage(1); - } elsif (!defined $MODE) { - $MODE = $args[0]; - } else { - push @ARGS, $args[0]; - } - shift @args; -} - - -if (!defined $MODE) { - print STDERR "$progname: you must specify a MODE\n"; - usage(1); -} - -$modename .= ": $MODE"; -if ($MODE eq 'archive') { - if ($#ARGS != 1) { - print STDERR "$modename: you must specify a DIR and KMOD\n"; - usage(1); - } - - my $ARCHIVE = $ARGS[0]; - my $KMOD = $ARGS[1]; - - my $KABI = $KMOD; - $KABI =~ s/\.k?o$//; - $KABI .= '.kabi'; - - open(MD5SUM, "md5sum $KABI|") or - die "$modename: cannot execute \`md5sum': $!\n"; - my $hash = <MD5SUM>; - close(MD5SUM); - $hash =~ s/\s+.*//s; - - my $TAG = ''; - if (-d 'CVS') { - open(TAG, '<CVS/Tag') or - die "$modename: cannot read \`CVS/Tag': $!\n"; - $TAG = <TAG>; - close(TAG); - chomp $TAG; - $TAG = "/$TAG"; - } - - my ($dir, @sh_c, @cp); - if ($ARCHIVE =~ /^([^:][^:]+):(.*)$/) { - $dir = $2; - @sh_c = ('ssh', '-o', 'BatchMode=yes', $1); - @cp = ('scp', '-B'); - } else { - $dir = $ARCHIVE; - @sh_c = ('sh', '-c'); - @cp = ('cp'); - } - - system(@sh_c, "test -d $dir"); - if ($? >> 8 != 0) { - print STDERR "$modename: warning: \`$dir' is not reachable or does not exist\n"; - exit 0; - } - - print "archiving $KMOD in $ARCHIVE$TAG/$KMOD/$hash\n" - if $VERBOSE; - foreach my $d ("$dir$TAG", "$dir$TAG/$KMOD", "$dir$TAG/$KMOD/$hash") { - system(@sh_c, "test -d $d || mkdir $d"); - if ($? >> 8 != 0) { - exit $? >> 8; - } - } - system(@cp, $KMOD, $KABI, "$ARCHIVE$TAG/$KMOD/$hash"); - exit $? >> 8; -} elsif ($MODE eq 'module') { - - if ($#ARGS != 0) { - print STDERR "$modename: you must specify exactly one KMOD\n"; - usage(1); - } - - my $KMOD = $ARGS[0]; - - if (!defined $OUTPUT) { - $OUTPUT = $KMOD; - $OUTPUT =~ s/\.k?o$//; - $OUTPUT .= '.kabi'; - } - print "create $OUTPUT\n" if $VERBOSE; - open(OUT, ">$OUTPUT") or - die "$modename: cannot create \`$OUTPUT': $!\n"; - - my $outname = $OUTPUT; - $outname =~ s/^.*\///; - print OUT <<EOF; -# $outname - Kernel module ABI descriptor file -# DO NOT EDIT - Automatically generated by $progname $VERSION -EOF - - # Get the kernel version. - print OUT "kver " . kernel_version() . "\n"; - - # Gather the undefined symbols with version numbers from the - # kernel module. - my %vers; - my @undefs; - - # Gather the version numbers, if any. - my $modfile = $KMOD; - if ($modfile =~ s/\.ko$/.mod.c/) { - open(MOD, "<$modfile") or - die "$modename: cannot read \`$modfile': $!\n"; - my $versions = 0; - while ($_ = <MOD>) { - if (/\"__versions\"/) { - $versions = 1; - } elsif ($versions) { - if (/^\s*\{\s*(0x[0-9a-f]+)\s*,\s*\"([^\"]*)\"\s*\}\s*,\s*$/) { - $vers{$2} = $1; - push(@undefs, $2); - } elsif (/^\s*\}\s*;\s*$/) { - $versions = 0; - } - } - } - close(MOD); - } else { - open(NM, "nm $KMOD |") or - die "$modename: cannot execute \`nm $KMOD': $!\n"; - while ($_ = <NM>) { - if (/^\s*U\s*(.*\S)\s*$/) { - push @undefs, $1; - } - } - close(NM); - } - - foreach my $undef (sort @undefs) - { - print OUT "usym $undef"; - if (defined $vers{$undef}) { - print OUT " ", $vers{$undef}; - } - print OUT "\n"; - } - - close(OUT) or - die "$modename: cannot write \`$OUTPUT': $!\n"; - -} elsif ($MODE eq 'match') { - my @KABIS; - my @KMODS; - - my @todo = @ARGS; - while ($#todo >= 0) { - my $t = shift @todo; - if ($t =~ /\.kabi$/) { - push @KABIS, $t; - } elsif (-d $t) { - # Add all the contents of the directory to our todo list. - opendir(DIR, $t); - while (my $ent = readdir(DIR)) { - if ($ent =~ /^\./) { - # Skip dotfiles. - } elsif (-d "$t/$ent") { - # Recurse into subdirectories. - unshift @todo, "$t/$ent"; - } elsif ($ent =~ /\.k?o$/) { - # Add kernel modules. - unshift @todo, "$t/$ent"; - } - } - closedir(DIR); - } else { - # It's an explicit kernel module. - push @KMODS, $t; - } - } - - if ($#KABIS < 0) { - print STDERR "$modename: you must specify at least one KABI\n"; - usage(1); - } - - my %dsyms; - - if (-f "$LINUX/Module.symvers") { - # Look up the version numbers in Module.symvers. - open(VERS, "<$LINUX/Module.symvers") or - die "$modename: cannot read \`$LINUX/Module.symvers': $!\n"; - while ($_ = <VERS>) { - if (/^(0x[0-9a-f]+)\s+(\S+)/) { - $dsyms{$2} = hex($1); - } - } - close(VERS); - } else { - # Read in all the non-versioned symbols defined by this kernel. - open(MAP, "<$LINUX/System.map") or - die "$modename: cannot read \`$LINUX/System.map': $!\n"; - while ($_ = <MAP>) { - if (/^[0-9a-fA-F]*\s+[ABCDGIRSTW]+\s*(.*\S)\s*$/) { - $dsyms{$1} = 0; - } - } - close(MAP); - } - - # Find the symbols for the installed modules, too. - foreach my $mod (@KMODS) { - open(NM, "nm $mod |") or - die "$modename: cannot execute \`nm $mod': $!\n"; - while ($_ = <NM>) { - if (/^[0-9a-fA-F]*\s+[ABCDGIRSTW]+\s*(.*\S)\s*$/) { - $dsyms{$1} = 0; - } - } - close(NM); - } - - # Also get the kernel version. - my $kver = kernel_version(); - - # Read each kabi file and print out the ones that are plausible - # matches. - foreach my $kabi (@KABIS) { - open(KABI, "<$kabi") or - die "$modename: cannot read \`$kabi': $!\n"; - my $possible = 1; - while ($possible && ($_ = <KABI>)) { - if (/^\s*#/) { - # Skip comments. - } elsif (/^\s*kver\s+(.*\S)\s*$/) { - my $modkver = $1; - if ($modkver ne $kver) { - print STDERR "$kabi:$.: module version \`$modkver' differs from \`$kver'\n" - if $VERBOSE; - $possible = 0; - } - } elsif (/^\s*usym\s+(\S+)\s*(\S+)?\s*$/) { - my ($modsym, $symver) = ($1, hex($2)); - if (!defined $dsyms{$modsym}) { - print STDERR "$kabi:$.: module symbol \`$modsym' is not defined\n" - if $VERBOSE; - $possible = 0; - } elsif (defined $symver && $dsyms{$modsym} != 0 && $dsyms{$modsym} != $symver) { - printf STDERR "$kabi:$.: module symbol \`$modsym' is version 0x%x, not 0x%x\n", $dsyms{$modsym}, $symver - if $VERBOSE; - $possible = 0; - } - } elsif (/^\s*(\S+)/) { - print STDERR "$kabi:$.: unrecognized descriptor line \`$1'\n"; - } - } - close(KABI); - - if ($possible) { - # We got a match. - print "$kabi\n"; - } - } - -} else { - print STDERR "$progname: unrecognized mode \`$MODE'\n"; - usage(1); -} - - -# Read the kernel version from its built source tree. -sub kernel_version -{ - my $verfile = "$LINUX/include/linux/version.h"; - open(VERSION, "<$verfile") or - die "$modename: cannot read \`$verfile': $!\n"; - - my $ver; - while ($_ = <VERSION>) { - if (/^\s*#\s*define\s+UTS_RELEASE\s+"(.*)"\s*$/) { - $ver = $1; - last; - } - } - - close(VERSION); - - if (!defined $ver) { - die "$modename: cannot find UTS_RELEASE in \`$verfile'\n"; - } - return "linux-$ver"; -} - -exit 0; diff --git a/build/land1.sh b/build/land1.sh deleted file mode 100755 index ef875e64b2a62c94c7582ee0ddb9a48d0c448caa..0000000000000000000000000000000000000000 --- a/build/land1.sh +++ /dev/null @@ -1,119 +0,0 @@ -#!/bin/bash -e - -progname=${0##*/} - -CONFLICTS=cvs-merge-conflicts -CVS=cvs - -if [ -f .mergeinfo ] ; then - echo ".mergeinfo exists - clean up first" - exit -fi - -if [ -f $CONFLICTS ] ; then - echo "$CONFLICTS exists - clean up first" - exit -fi - -if [ $# -lt 2 -o $# -gt 3 ]; then - echo "This is phase 1 of merging branches. Usage: $0 parent child [dir]" - exit -fi - -parent=$1 -PARENT=`echo $parent | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -child=$2 -CHILD=`echo $child | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -date=`date +%Y%m%d_%H%M` - -dir=${3:-.} -module=$(basename $(<$dir/CVS/Repository)) - -if [ "$module" = "lustre" ] ; then - echo >&2 "${progname}: You probably want to land lustre or lnet, not the whole tree." - echo >&2 "${progname}: Try using ${0} $parent $child lustre" - exit 1 -fi - -case $parent in - HEAD) : ;; - b_*|b[1-4]*) : ;; - *) parent="b_$parent" ;; -esac -case $child in - HEAD) : ;; - b_*|b[1-4]*) : ;; - *) child="b_$child" -esac - -if [ "$parent" != "HEAD" -a "`cat $dir/CVS/Tag 2> /dev/null`" != "T$parent" ]; then - echo "${progname}: this script must be run within the $parent branch" - exit 1 -fi - -TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter? -check_tag() { - [ -z "$1" ] && echo "check_tag() missing arg" && exit3 - [ "$1" = "HEAD" ] && return - $CVS log ${dir%%/*}/$TEST_FILE 2> /dev/null | grep -q " $1: " && return - echo "${progname}: tag $1 not found in $dir/$TEST_FILE" - exit 2 -} - -check_tag $child -check_tag ${CHILD}_BASE - -cat << EOF > ".mergeinfo" -parent=$parent -PARENT=$PARENT -child=$child -CHILD=$CHILD -date=$date -module=$module -dir=$dir -CONFLICTS=$CONFLICTS -OPERATION=Land -OPERWHERE=onto -EOF - -echo PARENT $PARENT parent $parent CHILD $CHILD child $child date $date - -# Update your tree to the PARENT branch; HEAD is not really a branch, so you -# need to update -A instead of update -r HEAD, or the commit will fail. -p -echo -n "Updating to $parent ...." -if [ $parent == "HEAD" ]; then - $CVS update -AdP $dir -else - $CVS update -r $parent -dP $dir -fi -echo "done" - -echo -n "Create land-to point on $parent as ${PARENT}_${CHILD}_LAND_PARENT_$date ..." -$CVS rtag -r $parent ${PARENT}_${CHILD}_LAND_PARENT_$date $module -echo "done" - -echo -n "Create land-from point on ${child} ${PARENT}_${CHILD}_LAND_CHILD_$date ..." -$CVS rtag -r ${child} ${PARENT}_${CHILD}_LAND_CHILD_$date $module -echo "done" - -echo -n "Preserve old base tag on $parent ${CHILD}_BASE as ${CHILD}_BASE_PREV ..." -$CVS rtag -F -r ${CHILD}_BASE ${CHILD}_BASE_PREV $module -echo "done" - -# Apply all of the changes to your local tree: -echo -n "Updating as -j ${CHILD}_BASE -j ${PARENT}_${CHILD}_LAND_CHILD_$date ..." -$CVS update -j ${CHILD}_BASE -j ${PARENT}_${CHILD}_LAND_CHILD_$date $dir -echo "done" - -echo -n "Recording conflicts in $CONFLICTS ..." -$CVS update | awk '/^C/ { print $2 }' > $CONFLICTS -if [ -s $CONFLICTS ] ; then - echo "Conflicts found, fix before committing." - cat $CONFLICTS -else - echo "No conflicts found" - rm -f $CONFLICTS -fi -echo "done" - -echo "Build, test, commit and then run land2.sh (no arguments)" diff --git a/build/lbuild b/build/lbuild deleted file mode 100755 index 7714d62ebe9c7f27ac468f96f8bfd54a21689063..0000000000000000000000000000000000000000 --- a/build/lbuild +++ /dev/null @@ -1,726 +0,0 @@ -#!/bin/sh - -# vim:expandtab:shiftwidth=4:softtabstop=4:tabstop=4: - -TOPDIR=$PWD - -# CVSROOT is inherited from the environment -KERNELDIR= -LINUX= -LUSTRE= -RELEASE=0 -DO_SRC=0 -DOWNLOAD=1 -TAG= -TARGET= -TARGET_ARCHS= -CONFIGURE_FLAGS= -EXTERNAL_PATCHES= -EXTRA_VERSION= -STAGEDIR= -TMPDIR=${TMPDIR:-"/var/tmp"} - -# from target file -KERNEL= -SERIES= -CONFIG= -VERSION= - -RHBUILD=0 -SUSEBUILD=0 -LINUX26=0 -SUSEBUILD=0 - -BASE_ARCHS= -BIGMEM_ARCHS= -BOOT_ARCHS= -JENSEN_ARCHS= -SMP_ARCHS= -BIGSMP_ARCHS= -PSERIES64_ARCHS= -UP_ARCHS= - -DATE=$(date) - -USE_DATESTAMP=1 -RPMBUILD= - -export CC=${CC:-gcc} - -# Readlink is not present on some older distributions: emulate it. -readlink() { - local path=$1 ll - - if [ -L "$path" ]; then - ll="$(LC_ALL=C ls -l "$path" 2> /dev/null)" && - echo "${ll/* -> }" - else - return 1 - fi -} - -cleanup() -{ - true -} - -error() -{ - [ "$1" ] && echo -e "\n${0##*/}: $1" -} - -fatal() -{ - cleanup - error "$2" - exit $1 -} - -is_release() -{ - (( $RELEASE )) || return 0 -} - -list_targets() -{ - echo -n "Available targets:" - for target in $TOPDIR/lustre/lustre/kernel_patches/targets/*.target ; do - target_file=${target##*/} - echo -n " ${target_file%%.target}" - done - echo -} - -usage() -{ - cat <<EOF -Usage: ${0##*/} [OPTION]... [-- <lustre configure options>] - - -d CVSROOT - Specifies the CVS Root to use when pulling files from CVS. The - environment variable \$CVSROOT is used if this option is not - present. - - --external-patches=EXTERNAL_PATCHES - Directory similar to lustre/lustre/kernel_patches/ that lbuild should - look for seres and config files in before looking in the lustre - tree. - - --extraversion=EXTRAVERSION - Text to use for the rpm release and kernel extraversion. - - --kerneldir=KERNELDIR - Directory containing Linux source tarballs referenced by target - files. - - --linux=LINUX - Directory of Linux kernel sources. When this option is used, only - Lustre modules and userspace are built. - - --lustre=LUSTRE - Path to an existing lustre source tarball to use instead of - pulling from CVS. - - --nodownload - Do not try to download a kernel from ftp.lustre.org - - --nosrc - Do not build a .src.rpm, a full kernel patch, or a patched kernel - tarball. - - --publish - Unused. - - --release - Specifies that the files generated do not include timestamps, and - that this is an official release. - - --src - Build a .src.rpm, a full kernel patch, and a patched kernel tarball. - - --stage=DIR - Directory used to stage packages for release. RPMs will be placed - more or less in DIR/<target>-<arch>, and the tarball will be - placed in DIR. - - --tag=TAG - A CVS branch/tag name to build from when pulling from CVS. - - --target=TARGET - The name of the target to build. The available targets are listed - below. - - --target-archs=TARGET_ARCHS - A (space delimited) list of architectures to build. By default, - all of the archs supported by the TARGET will be built, in - addition to a .src.rpm. This option can limit those, for machines - that can only build certain archs or if you only want a certain - arch built (for testing, or a one-off kernel). - - Also note that by using a non-"base" arch (eg, i386) only kernels - will be built - there will be no lustre-lite-utils package. - - --disable-datestamp - Prevents the datestamp flag (-D) from being passed to cvs for - checkouts. This is a workaround for a problem encountered when - using lbuild with tinderbox. - -EOF - -# list_targets - - fatal "$1" "$2" -} - -check_options() -{ - if [ "$LUSTRE" ] ; then - [ -r "$LUSTRE" ] || \ - usage 1 "Could not find Lustre source tarball '$LUSTRE'." - else - [ "$CVSROOT" ] || \ - usage 1 "Either specify a CVS Root with -d, or a Lustre source tarball with --lustre." - [ "$TAG" ] || \ - usage 1 "A branch/tag name must be specified with --tag when not building from a tarball." - fi - - if [ -z "$LINUX" ] ; then - [ "$KERNELDIR" ] || \ - usage 1 "A kernel directory must be specified with --kerneldir." - - [ -d "$KERNELDIR" ] || \ - usage 1 "$KERNELDIR is not a directory." - - if ! (( $RELEASE )) ; then - [ "$TAG" ] || \ - usage 1 "When building a snapshot, a tag name must be used." - fi - - [ "$TARGET" ] || usage 1 "A target must be specified with --target." -# TARGET_FILE="$TOPDIR/lustre/kernel_patches/targets/$TARGET.target" -# [ -r "$TARGET_FILE" ] || \ -# usage 1 "Target '$TARGET' was not found." - fi - - case $TARGET in - 2.6-rhel4) - CANONICAL_TARGET="rhel-2.6" - ;; - 2.6-suse) - CANONICAL_TARGET="sles-2.6" - ;; - 2.6-sles10) - CANONICAL_TARGET="sles10-2.6" - ;; - hp_pnnl-2.4) - CANONICAL_TARGET="hp-pnnl-2.4" - ;; - 2.6-vanilla \ - | suse-2.4.21-2 \ - | rh-2.4 \ - | rhel-2.4 \ - | sles-2.4 \ - | 2.6-patchless) - CANONICAL_TARGET="$TARGET" - ;; - esac - - TIMESTAMP=$(date -d "$DATE" "+%Y%m%d%H%M") - - RPMBUILD=$(which rpmbuild 2>/dev/null | head -1) - if [ ! "$RPMBUILD" -o "$RPMBUILD" == "" ]; then - RPMBUILD=$(which rpm 2>/dev/null | head -1) - if [ ! "$RPMBUILD" -o "$RPMBUILD" == "" ]; then - usage 1 "Could not find binary for making rpms (tried rpmbuild and rpm)." - fi - fi -} - -uniqify() -{ - echo $(echo "$*" | xargs -n 1 | sort -u) -} - -build_tarball() { - local TARGET=$1 - local SRPM=$2 - - if [ "$TARGET" = "rhel-2.6" -o "$TARGET" = "rhel-2.4" ]; then - local SPEC="" - if [ "$TARGET" = "rhel-2.6" ]; then - SPEC=kernel-2.6.spec - OLDCONFIG=nonint_oldconfig - elif [ "$TARGET" = "rhel-2.4" ]; then - SPEC=kernel-2.4.spec - OLDCONFIG=oldconfig - fi - - RPMTOPDIR=$(mktemp -d $KERNELDIR/rpm_XXXXXX) - mkdir $RPMTOPDIR/BUILD/ - rpm -ivh $KERNELDIR/$SRPM --define "_topdir $RPMTOPDIR" || \ - { rm -rf $RPMTOPDIR; fatal 1 "Error installing kernel SRPM."; } - $RPMBUILD -bp --nodeps --target i686 $RPMTOPDIR/SPECS/$SPEC --define "_topdir $RPMTOPDIR" - pushd $RPMTOPDIR/BUILD/kernel-${lnxmaj}/linux-${lnxmaj} && { - make mrproper - cp configs/kernel-${lnxmaj}-i686-smp.config .config - if ! make $OLDCONFIG > /dev/null; then - fatal 1 "error trying to make $OLDCONFIG while building a tarball from SRPM." - fi - make include/linux/version.h - rm -f .config - cd .. - tar cjf $KERNEL_FILE linux-${lnxmaj} - } - popd - rm -rf $RPMTOPDIR - fi -} - -download_and_build_tarball() { - local TARGET=$1 - local KERNEL_FILE=$2 - - local SRPM=kernel-${lnxmaj}-${lnxrel}.src.rpm - - echo "Downloading http://ftp.lustre.org/kernels/$TARGET/old/$SRPM..." - if ! wget -nv "http://ftp.lustre.org/kernels/$TARGET/old/$SRPM" \ - -O "$KERNELDIR/$SRPM" ; then - fatal 1 "Could not download target $TARGET's kernel SRPM $SRPM from ftp.lustre.org." - fi - - build_tarball $TARGET $SRPM -} - -load_target() -{ - EXTRA_VERSION_save="$EXTRA_VERSION" - for patchesdir in "$EXTERNAL_PATCHES" "$TOPDIR/lustre/lustre/kernel_patches" ; do - TARGET_FILE="$patchesdir/targets/$TARGET.target" - [ -r "$TARGET_FILE" ] && break - done - [ -r "$TARGET_FILE" ] || \ - fatal 1 "Target $TARGET was not found." - - echo "Loading target config file $TARGET.target..." - - . "$TARGET_FILE" - - [ "$KERNEL" ] || fatal 1 "Target $TARGET did not specify a kernel." - [ "$VERSION" ] || fatal 1 "Target $TARGET did not specify a kernel version." - - if [ "$KERNELDIR" ] ; then - KERNEL_FILE="$KERNELDIR/$KERNEL" - if [ ! -r "$KERNELDIR/$KERNEL" ] ; then - # see if we have an SRPM we can build a tarball for - KERNEL_SRPM=kernel-${lnxmaj}-${lnxrel}.src.rpm - if [ -r "$KERNELDIR/$KERNEL_SRPM" ] ; then - build_tarball $CANONICAL_TARGET $KERNEL_SRPM - else - if (( $DOWNLOAD )) ; then - echo "Downloading http://ftp.lustre.org/kernels/$CANONICAL_TARGET/old/$KERNEL..." - if ! wget -nv "http://ftp.lustre.org/kernels/$CANONICAL_TARGET/old/$KERNEL" -O "$KERNELDIR/$KERNEL" ; then - # see if we can do it with an SRPM from the download site - download_and_build_tarball $CANONICAL_TARGET $KERNEL_FILE - fi - else - fatal 1 "Target $TARGET's kernel file $KERNEL not found in kernel directory $KERNELDIR." - fi - fi - fi - fi - - if [ "$SERIES" ] ; then - for series in $SERIES ; do - for patchesdir in "$EXTERNAL_PATCHES" "$TOPDIR/lustre/lustre/kernel_patches" ; do - [ -r "$patchesdir/series/$series" ] && continue 2 - done - fatal 1 "Target $TARGET's series $SERIES could not be found.\nSearched:\n\t$EXTERNAL_PATCHES/series\n\t$TOPDIR/lustre/lustre/kernel_patches/series." - done - fi - - CONFIG_FILE="$TOPDIR/lustre/lustre/kernel_patches/kernel_configs/$CONFIG" - [ -r "$CONFIG_FILE" ] || \ - fatal 1 "Target $TARGET's config file $CONFIG missing from $TOPDIR/lustre/lustre/kernel_patches/kernel_configs/." - - if [ "$EXTRA_VERSION_save" ] ; then - EXTRA_VERSION="$EXTRA_VERSION_save" - elif ! (( $RELEASE )) ; then - # if there is no patch series, then this is not a lustre specific - # kernel. don't make it look like one - if [ -n "$SERIES" ]; then - #remove the @VERSION@ (lustre version) - EXTRA_VERSION=$(echo $EXTRA_VERSION | sed -e "s/\(.*_lustre\)\..*/\1/") - EXTRA_VERSION="${EXTRA_VERSION}-${TAG}.${TIMESTAMP}" - fi - fi - # EXTRA_VERSION=${EXTRA_VERSION//-/_} - - ALL_ARCHS="$BASE_ARCHS $BIGMEM_ARCHS $BOOT_ARCHS $JENSEN_ARCHS $SMP_ARCHS $BIGSMP_ARCHS $PSERIES64_ARCHS $UP_ARCHS" - - BUILD_ARCHS= - for arch in $(uniqify "$ALL_ARCHS") ; do - if [ -z "$TARGET_ARCHS" ] || echo "$TARGET_ARCHS" | grep "$arch" >/dev/null 2>/dev/null ; then - BUILD_ARCHS="$BUILD_ARCHS $arch" - fi - done - [ "$BUILD_ARCHS" ] || usage 1 "No available target archs to build." - echo "Building for: $BUILD_ARCHS" -} - -tarflags() -{ - case "$1" in - '') - fatal 1 "tarflags(): File name argument missing." - ;; - *.tar.gz | *.tgz) - echo 'zxf' - ;; - *.tar.bz2) - echo 'jxf' - ;; - *.tar) - echo 'xf' - ;; - *) - fatal 1 "tarflags(): Unrecognized tar extension in file: $1" - ;; - esac -} - -untar() -{ - echo "Untarring ${1##*/}..." - tar $(tarflags "$1") "$1" -} - -unpack_lustre() -{ - DIRNAME="lustre-$TAG-$TIMESTAMP" - if [ "$LUSTRE" ] ; then - untar "$LUSTRE" - [ -d lustre ] || ln -sf lustre-[0-9].[0-9]* lustre - else - if [ "$USE_DATESTAMP" ]; then - DATESTAMP="-D '$DATE'" - else - DATESTAMP="" - fi - - cvs -d "$CVSROOT" -qz3 co $DATESTAMP -d "$DIRNAME" lustre || \ - fatal 1 "There was an error checking out toplevel Lustre from CVS." - pushd "$DIRNAME" > /dev/null - ./lustrecvs "$TAG" || \ - fatal 1 "There was an error checking out Lustre/Portals/Build from CVS." - echo "Creating lustre tarball..." - sh autogen.sh || fatal 1 "There was an error running autogen.sh." - ./configure --disable-{modules,utils,liblustre,tests,doc} || \ - fatal 1 "There was an error running ./configure to create makefiles." - make dist || fatal 1 "There was an error running 'make dist'." - popd > /dev/null - fname=`basename $DIRNAME/lustre-*.tar.gz` - cp $DIRNAME/$fname . || fatal 1 "There was an error copying lustre tarball." - LUSTRE="$PWD/$fname" - ln -sf "$DIRNAME" lustre - fi -} - -unpack_linux() -{ - untar "$KERNEL_FILE" - [ -d linux ] || ln -sf linux* linux -} - -patch_linux() -{ - [ "$SERIES" ] || return 0 - FULL_PATCH="$PWD/lustre-kernel-${TARGET}-${EXTRA_VERSION}.patch" - [ -f "$FULL_PATCH" ] && rm -f "$FULL_PATCH" - pushd linux >/dev/null - for series in $SERIES ; do - echo -n "Applying series $series:" - for patchesdir in "$EXTERNAL_PATCHES" "$TOPDIR/lustre/lustre/kernel_patches" ; do - [ -r "$patchesdir/series/$series" ] || continue - SERIES_FILE="$patchesdir/series/$series" - for patch in $(<"$SERIES_FILE") ; do - echo -n " $patch" - PATCH_FILE="$patchesdir/patches/$patch" - [ -r "$PATCH_FILE" ] || \ - fatal 1 "Patch $patch does not exist in Lustre tree." - cat "$PATCH_FILE" >> "$FULL_PATCH" || \ - fatal 1 "Error adding patch $patch to full patch." - patch -s -p1 < "$PATCH_FILE" || fatal 1 "Error applying patch $patch." - done - break - done - echo - done - popd >/dev/null - echo "Full patch has been saved in ${FULL_PATCH##*/}." - echo "Replacing .config files..." - [ -d linux/configs ] || mkdir linux/configs || \ - fatal 1 "Error creating configs directory." - rm -f linux/configs/* - copysuccess=0 - for patchesdir in "$EXTERNAL_PATCHES" "lustre/lustre/kernel_patches" ; do - [ "$patchesdir" ] && \ - cp -v $patchesdir/kernel_configs/kernel-${VERSION}-${TARGET}*.config linux/configs/ >/dev/null && copysuccess=1 - done - [ "$copysuccess" = "1" ] || \ - fatal 1 "Error copying in kernel configs." -} - -pack_linux() -{ - TARBALL="$(readlink linux)-$EXTRA_VERSION.tar.gz" - echo "Creating patched linux tarball $TARBALL..." - tar zcf "$TARBALL" "$(readlink linux)" \ - --exclude "CVS" --exclude ".cvsignore" || \ - --exclude "*.orig" --exclude "*~" --exclude "*.rej" || \ - fatal 1 "Error creating patched Linux tarball." -} - -clean_linux() -{ - [ -d linux ] || return 0 - echo "Cleaning linux..." - [ -L linux ] && rm -rf $(readlink linux) - rm -rf linux -} - -prep_kernel_build() -{ - # make .spec file - ENABLE_INIT_SCRIPTS="" - sed \ - -e "s^@BASE_ARCHS@^$BASE_ARCHS^g" \ - -e "s^@BIGMEM_ARCHS@^$BIGMEM_ARCHS^g" \ - -e "s^@BIGSMP_ARCHS@^$BIGSMP_ARCHS^g" \ - -e "s^@BOOT_ARCHS@^$BOOT_ARCHS^g" \ - -e "s^@CONFIGURE_FLAGS@^$CONFIGURE_FLAGS^g" \ - -e "s^@ENABLE_INIT_SCRIPTS@^$ENABLE_INIT_SCRIPTS^g" \ - -e "s^@JENSEN_ARCHS@^$BOOT_ARCHS^g" \ - -e "s^@KERNEL_EXTRA_VERSION@^$EXTRA_VERSION^g" \ - -e "s^@KERNEL_RELEASE@^${EXTRA_VERSION//-/_}^g" \ - -e "s^@KERNEL_SOURCE@^$KERNEL^g" \ - -e "s^@KERNEL_VERSION@^$VERSION^g" \ - -e "s^@LINUX26@^$LINUX26^g" \ - -e "s^@LUSTRE_SOURCE@^${LUSTRE##*/}^g" \ - -e "s^@LUSTRE_TARGET@^$TARGET^g" \ - -e "s^@PSERIES64_ARCHS@^$PSERIES64_ARCHS^g" \ - -e "s^@RHBUILD@^$RHBUILD^g" \ - -e "s^@SMP_ARCHS@^$SMP_ARCHS^g" \ - -e "s^@SUSEBUILD@^$SUSEBUILD^g" \ - -e "s^@SUSEBUILD@^$SUSEBUILD^g" \ - -e "s^@UP_ARCHS@^$UP_ARCHS^g" \ - < $TOPDIR/lustre/build/lustre-kernel-2.4.spec.in \ - > lustre-kernel-2.4.spec - [ -d SRPMS ] || mkdir SRPMS - [ -d RPMS ] || mkdir RPMS - [ -d BUILD ] || mkdir BUILD - [ -d SOURCES ] || mkdir SOURCES - for script in linux-{rhconfig.h,merge-config.awk,merge-modules.awk} \ - suse-{functions.sh,post.sh,postun.sh,trigger-script.sh.in} \ - sles8-{pre,post,postun,update_{INITRD_MODULES,rcfile_setting}}.sh ; do - cp $TOPDIR/lustre/build/$script SOURCES - done - cp "$LUSTRE" "$KERNEL_FILE" SOURCES - if [ "$EXTERNAL_PATCHES" -a -d "$EXTERNAL_PATCHES" ] ; then - tar zcf SOURCES/external-patches.tar.gz -C "$EXTERNAL_PATCHES" series targets patches kernel_configs - else - touch SOURCES/external-patches.tar.gz - fi -} - -clean_lustre() -{ - [ -d lustre ] || return 0 - echo "Cleaning Lustre..." - [ -L lustre ] && rm -rf $(readlink lustre) - rm -rf lustre -} - -build_kernel() -{ - echo "Building kernel + Lustre RPMs for: $BUILD_ARCHS..." - targets= - for arch in $BUILD_ARCHS ; do - targets="--target $arch $targets" - done - - $RPMBUILD $targets -bb lustre-kernel-2.4.spec \ - --define "_tmppath $TMPDIR" \ - --define "_topdir $TOPDIR" || \ - fatal 1 "Error building rpms for $BUILD_ARCHS." - - if (( $DO_SRC )) ; then - $RPMBUILD -bs lustre-kernel-2.4.spec \ - --define "_tmppath $TMPDIR" \ - --define "_topdir $TOPDIR" || \ - fatal 1 "Error building .src.rpm." - fi -} - -build_lustre() -{ - [ -d SRPMS ] || mkdir SRPMS - [ -d RPMS ] || mkdir RPMS - [ -d BUILD ] || mkdir BUILD - [ -d SOURCES ] || mkdir SOURCES - - cp "$LUSTRE" SOURCES - - pushd lustre >/dev/null - - echo "Building Lustre RPMs for: $BUILD_ARCHS..." - targets= - for arch in $BUILD_ARCHS ; do - targets="--target $arch $targets" - done - - ./configure "--with-linux=${LINUX}" ${CONFIGURE_FLAGS} - - $RPMBUILD $targets -bb build/lustre.spec \ - --define "_tmppath $TMPDIR" \ - --define "_topdir $TOPDIR" || \ - fatal 1 "Error building rpms for $BUILD_ARCHS." - - popd >/dev/null -} - -stage() -{ - [ "$STAGEDIR" ] || return 0 - - for arch in $BUILD_ARCHS ; do - rpmdir="${STAGEDIR}/${CANONICAL_TARGET}-${arch}" - echo "${0##*/}: Copying RPMs into ${rpmdir}" - mkdir -p "${rpmdir}" - cp -v RPMS/${arch}/*.rpm "${rpmdir}" - if [ -d RPMS/noarch ] ; then - cp -v RPMS/noarch/*.rpm "${rpmdir}" - fi - done - - cp -v "$LUSTRE" "$STAGEDIR" -} - -[ -r ~/.lbuildrc ] && . ~/.lbuildrc - -options=$(getopt -o d:D:h -l disable-datestamp,external-patches:,extraversion:,kerneldir:,linux:,lustre:,nodownload,nosrc,publish,release,src,stage:,tag:,target:,target-archs:,with-linux: -- "$@") - -if [ $? != 0 ] ; then - usage 1 -fi - -eval set -- "$options" - -while [ "$1" ] ; do - case "$1" in - '') - usage 1 - ;; - -d) - CVSROOT=$2 - shift 2 - ;; - -D) - DATE=$2 - shift 2 - ;; - --external-patches) - EXTERNAL_PATCHES=$2 - shift 2 - ;; - --extraversion) - EXTRA_VERSION=$2 - shift 2 - ;; - --help | -h) - usage 0 - ;; - --kerneldir) - KERNELDIR=$2 - shift 2 - ;; - --linux | --with-linux) - LINUX=$2 - shift 2 - ;; - --lustre) - LUSTRE=$2 - shift 2 - ;; - --nodownload) - DOWNLOAD=0 - shift 1 - ;; - --nosrc) - DO_SRC=0 - shift 1 - ;; - --publish) - shift - ;; - --release) - RELEASE=1 - shift - ;; - --src) - DO_SRC=1 - shift 1 - ;; - --stage) - STAGEDIR=$2 - shift 2 - ;; - --tag) - TAG=$2 - shift 2 - ;; - --target) - TARGET=$2 - shift 2 - ;; - --target-archs) - TARGET_ARCHS=$2 - shift 2 - ;; - --disable-datestamp) - USE_DATESTAMP= - shift - ;; - --) - shift - CONFIGURE_FLAGS=$@ - break - ;; - *) - usage 1 "Unrecognized option: $1" - ;; - esac -done - -check_options - -unpack_lustre - -# prep_build needs the .spec.in from the lustre source -if [ -z "$LINUX" ] ; then - load_target - if (( $DO_SRC )) ; then - unpack_linux - patch_linux - pack_linux - clean_linux - fi - - prep_kernel_build - clean_lustre - - build_kernel -else - build_lustre -fi - -stage diff --git a/build/linux-rhconfig.h b/build/linux-rhconfig.h deleted file mode 100644 index a7aa42442cafafc4f17daba132e5590e169417b6..0000000000000000000000000000000000000000 --- a/build/linux-rhconfig.h +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Try to be a little smarter about which kernel are we currently running - */ - -#ifndef __rh_config_h__ -#define __rh_config_h__ - -/* - * First, get the version string for the running kernel from - * /boot/kernel.h - initscripts should create it for us - */ - -#include "/boot/kernel.h" - -#if defined(__BOOT_KERNEL_SMP) && (__BOOT_KERNEL_SMP == 1) -#define __module__smp -#endif /* __BOOT_KERNEL_SMP */ - -#if defined(__BOOT_KERNEL_BOOT) && (__BOOT_KERNEL_BOOT == 1) -#define __module__BOOT -#endif /* __BOOT_KERNEL_BOOT */ - -#if defined(__BOOT_KERNEL_BOOTSMP) && (__BOOT_KERNEL_BOOTSMP == 1) -#define __module__BOOTsmp -#endif /* __BOOT_KERNEL_BOOTSMP */ - -#if defined(__BOOT_KERNEL_ENTERPRISE) && (__BOOT_KERNEL_ENTERPRISE == 1) -#define __module__enterprise -#endif /* __BOOT_KERNEL_ENTERPRISE */ - -#if defined(__BOOT_KERNEL_BIGMEM) && (__BOOT_KERNEL_BIGMEM == 1) -#define __module__bigmem -#endif /* __BOOT_KERNEL_BIGMEM */ - -#if defined(__BOOT_KERNEL_DEBUG) && (__BOOT_KERNEL_DEBUG == 1) -#define __module__debug -#endif /* __BOOT_KERNEL_DEBUG */ - -#if !defined(__module__smp) && !defined(__module__BOOT) && !defined(__module__BOOTsmp) && !defined(__module__enterprise) && !defined(__module__bigmem) && !defined(__module__debug) -#define __module__up -#endif /* default (BOOT_KERNEL_UP) */ - -#ifdef __i386__ -# if defined(__MODULE_KERNEL_i586) && (__MODULE_KERNEL_i586 == 1) -# define __module__i586 -# ifdef __module__up -# define __module__i586_up -# endif -# ifdef __module__smp -# define __module__i586_smp -# endif -# ifdef __module__BOOT -# define __module__i586_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__i586_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__i586_enterprise -# endif -# ifdef __module__debug -# define __module_i586_debug -# endif -# elif defined(__MODULE_KERNEL_i686) && (__MODULE_KERNEL_i686 == 1) -# define __module__i686 -# ifdef __module__up -# define __module__i686_up -# endif -# ifdef __module__smp -# define __module__i686_smp -# endif -# ifdef __module__BOOT -# define __module__i686_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__i686_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__i686_enterprise -# endif -# ifdef __module__bigmem -# define __module__i686_bigmem -# endif -# ifdef __module__debug -# define __module_i686_debug -# endif -# elif defined(__MODULE_KERNEL_athlon) && (__MODULE_KERNEL_athlon == 1) -# define __module__athlon -# ifdef __module__up -# define __module__athlon_up -# endif -# ifdef __module__smp -# define __module__athlon_smp -# endif -# ifdef __module__BOOT -# define __module__athlon_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__athlon_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__athlon_enterprise -# endif -# ifdef __module__bigmem -# define __module__athlon_bigmem -# endif -# ifdef __module__debug -# define __module__athlon_debug -# endif -# else -# define __module__i386 -# ifdef __module__up -# define __module__i386_up -# endif -# ifdef __module__smp -# define __module__i386_smp -# endif -# ifdef __module__BOOT -# define __module__i386_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__i386_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__i386_enterprise -# endif -# ifdef __module__debug -# define __module__i386_debug -# endif -# endif -#endif - -#ifdef __sparc__ -# ifdef __arch64__ -# define __module__sparc64 -# ifdef __module__up -# define __module__sparc64_up -# endif -# ifdef __module__smp -# define __module__sparc64_smp -# endif -# ifdef __module__BOOT -# define __module__sparc64_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__sparc64_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__sparc64_enterprise -# endif -# ifdef __module__debug -# define __module__sparc64_debug -# endif -# else -# define __module__sparc -# ifdef __module__up -# define __module__sparc_up -# endif -# ifdef __module__smp -# define __module__sparc_smp -# endif -# ifdef __module__BOOT -# define __module__sparc_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__sparc_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__sparc_enterprise -# endif -# ifdef __module__debug -# define __module__sparc_debug -# endif -# endif -#endif - -#ifdef __alpha__ -# define __module__alpha -# ifdef __module__up -# define __module__alpha_up -# endif -# ifdef __module__smp -# define __module__alpha_smp -# endif -# ifdef __module__BOOT -# define __module__alpha_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__alpha_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__alpha_enterprise -# endif -# ifdef __module__debug -# define __module__alpha_debug -# endif -#endif - -#ifdef __ia64__ -# define __module__ia64 -# ifdef __module__up -# define __module__ia64_up -# endif -# ifdef __module__smp -# define __module__ia64_smp -# endif -# ifdef __module__BOOT -# define __module__ia64_BOOT -# endif -# ifdef __module__BOOTsmp -# define __module__ia64_BOOTsmp -# endif -# ifdef __module__enterprise -# define __module__ia64_enterprise -# endif -# ifdef __module__debug -# define __module__ia64_debug -# endif -#endif - -#if defined(__module__smp) || defined(__module__BOOTsmp) || defined(__module__enterprise) || defined(__module__bigmem) -#define _ver_str(x) smp_ ## x -#else -#define _ver_str(x) x -#endif - -#define RED_HAT_LINUX_KERNEL 1 - -#endif /* __rh_config_h__ */ diff --git a/build/lmake b/build/lmake deleted file mode 100755 index fd99ce89d17adf8afa6fe81fbfc0503b2bddd052..0000000000000000000000000000000000000000 --- a/build/lmake +++ /dev/null @@ -1,834 +0,0 @@ -#!/bin/sh - -# option variables -DESTDIR= -KERNELDIR= -TARGET= -# Not sure what to put here -# TARGET_ARCH=$(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -TARGET_ARCH= -TARGET_CONFIG= -JOBS=1 -CONFIGURE_FLAGS= -TMPDIR=${TMPDIR:-"/var/tmp"} - -# commands to run -BUILD_LUSTRE=0 -BUILD_KERNEL=0 -DEPEND_KERNEL=0 -INSTALL_LUSTRE=0 -INSTALL_KERNEL=0 -SAVE_HEADERS=0 -UNPACK_KERNEL=0 - -# provided by target file -KERNEL= -SERIES= -CONFIG= -VERSION= -EXTRA_VERSION= - -BASE_ARCHS= -BIGMEM_ARCHS= -BOOT_ARCHS= -JENSEN_ARCHS= -SMP_ARCHS= -BIGSMP_ARCHS= -PSERIES64_ARCHS= -UP_ARCHS= - -RHBUILD=0 -SUSEBUILD=0 - -# flat-out globals -TOPDIR= -TARGET_FILE= -KERNEL_FILE= -SERIES_FILE= -CONFIG_FILE= -RPMBUILD= - -canon() -{ - pushd $1 >/dev/null - echo $PWD - popd >/dev/null -} -TOPDIR="${0%%${0##*/}}" -if [ "${TOPDIR}" ] ; then - TOPDIR=$(canon "${TOPDIR}/..") -else - TOPDIR=$(canon "..") -fi - -lbuild_topdir() -{ - retdir=$TOPDIR - while [ ! -d $retdir/BUILD ] ; do - retdir=$(canon "$retdir/..") - if [ "$retdir" = "/" ] ; then - break; - fi - done - echo "$retdir" -} - -cleanup() -{ - true -} - -fatal() -{ - cleanup - [ "$2" ] && echo - [ "$2" ] && echo "${0##*/}: $2" - exit $1 -} - -list_targets() -{ - echo -n "Available targets:" - for target in $TOPDIR/lustre/kernel_patches/targets/*.target ; do - target_file=${target##*/} - echo -n " ${target_file%%.target}" - done - echo -} - - -usage() -{ - cat <<EOF -Usage: ${0##*/} [OPTION]... [-- <lustre configure options>] - -Options: - - --build - same as --build-kernel --build-lustre --unpack-kernel - - --build-lustre - configure and compile lustre. Requires that --build-kernel was - already run. - - --build-kernel - configure and compile a kernel. Implies --depend-kernel. - Requires that --unpack-kernel was already run. - - --depend-kernel) - Prepares a kernel tree for building (similar to make mrproper - oldconfig dep). Requires that --unpack-kernel was already run. - - --destdir=DESTDIR - Root directory to install into (like DESTDIR with auto*). - - --extraversion=EXTRAVERSION - Overrides the target kernel\'s EXTRAVERSION text. - - -h, --help - Display this message. - - --install - same as --install-kernel --install-lustre - - --install-lustre - run make install in the Lustre tree. - - --install-kernel - install the kernel image and modules. - - -j jobs - This works just like the -j option to make, and is passed to make - when building. - - --kerneldir=KERNELDIR - Directory containing linux source tarballs. - - --target=TARGET - Name of the configuration to use. The available targets are - listed below. - - --target-arch=ARCH - Specifies an architecture to use when choosing a kernel config - file. Default is i386. - - --target-config=CONFIG - Specifies a special option (such as smp, bigsmp, bigmem, or BOOT) - to use when choosing a kernel config file. This also modifies the - kernel version and modules directory. - - --unpack-kernel - Untars and patches the kernel source. - - The order that commands (--build-lustre, --unpack-kernel) are - specified on the command line is ignored; ${0##*/} will always - execute them in the correct order (unpack, then build, then install - etc.). - -EOF - list_targets - - fatal "$1" "$2" -} - -check_options() -{ - (( $BUILD_LUSTRE || $BUILD_KERNEL || $DEPEND_KERNEL || \ - $INSTALL_LUSTRE || $INSTALL_KERNEL || $SAVE_HEADERS || \ - $UNPACK_KERNEL )) || \ - fatal 1 "No commands specified." - - if (( $UNPACK_KERNEL )) ; then - [ "$KERNELDIR" ] || \ - fatal 1 "A kernel directory must be specified with --kerneldir." - [ -d "$KERNELDIR" ] || \ - fatal 1 "$KERNELDIR is not a directory." - fi - - if (( $INSTALL_LUSTRE || $INSTALL_KERNEL || $SAVE_HEADERS )) ; then - [ -z "$DESTDIR" -o -d "$DESTDIR" ] || \ - fatal 1 "$DESTDIR is not a directory." - fi - - [ "$TARGET" ] || usage 1 "A target must be specified with --target." - TARGET_FILE="$TOPDIR/lustre/kernel_patches/targets/$TARGET.target" - [ -r "$TARGET_FILE" ] || \ - fatal 1 "Target '$TARGET' was not found. Try --list-targets." - - if [ -z "$JOBS" -o "$JOBS" -lt "1" ] ; then - JOBS=1 - fi - - RPMBUILD=$(which rpmbuild 2>/dev/null | head -1) - if [ ! "$RPMBUILD" -o "$RPMBUILD" == "" ]; then - RPMBUILD=$(which rpm 2>/dev/null | head -1) - if [ ! "$RPMBUILD" -o "$RPMBUILD" == "" ]; then - usage 1 "Could not find binary for making rpms (tried rpmbuild and rpm)." - fi - fi -} - -get_lustre_version() -{ - for series in $SERIES ; do - SERIES_FILE="$TOPDIR/lustre/kernel_patches/series/$series" - lustre_patch=$(grep lustre_version "$SERIES_FILE" 2>/dev/null) - [ "$lustre_patch" ] && break - done - [ "$lustre_patch" ] || \ - fatal 1 "Could not determine Lustre version from $SERIES series." - - awk '/^\+#define LUSTRE_KERNEL_VERSION /{ print $3 }' \ - "$TOPDIR/lustre/kernel_patches/patches/$lustre_patch" 2>/dev/null -} - -load_target() -{ - EXTRA_VERSION_save="$EXTRA_VERSION" - - . "$TARGET_FILE" - - [ "$KERNEL" ] || fatal 1 "Target $TARGET did not specify a kernel." -# Suse 2.6 has our patches in already -# [ "$SERIES" ] || fatal 1 "Target $TARGET did not specify a patch series." -# [ "$CONFIG" ] || fatal 1 "Target $TARGET did not specify a kernel config." - [ "$VERSION" ] || fatal 1 "Target $TARGET did not specify the kernel version." - - if [ "$KERNELDIR" ] ; then - KERNEL_FILE="$KERNELDIR/$KERNEL" - [ -r "$KERNELDIR/$KERNEL" ] || \ - fatal 1 "Target $TARGET's kernel file $KERNEL not found in kernel directory $KERNELDIR." - fi - - if [ "$SERIES" ] ; then - for series in $SERIES ; do - SERIES_FILE="$TOPDIR/lustre/kernel_patches/series/$series" - [ -r "$SERIES_FILE" ] || \ - fatal 1 "Target $TARGET's series $SERIES missing from $TOPDIR/lustre/kernel_patches/series." - done - fi - - TARGET_ARCH=${TARGET_ARCH:-$BASE_ARCHS} - CONFIG_TARGET="$TARGET-${TARGET_ARCH}${TARGET_CONFIG:+-$TARGET_CONFIG}" - CONFIG_FILE="$TOPDIR/lustre/kernel_patches/kernel_configs/kernel-$VERSION-$CONFIG_TARGET.config" - [ -r "$CONFIG_FILE" ] || - fatal 1 "Target $TARGET's config file $CONFIG_FILE missing from $TOPDIR/lustre/kernel_patches/configs." - - if [ "$EXTRA_VERSION_save" ] ; then - EXTRA_VERSION="$EXTRA_VERSION_save" - else - EXTRA_VERSION="${EXTRA_VERSION}_lustre.$(get_lustre_version)" - fi -} - -# do these after load_target(), which maybe export CC -setup_ccache_distcc() -{ - # distcc can't handle ".incbin" - if [ "$TARGET" == "2.6-suse" -o "$TARGET" == "2.6-rhel4" ]; then - if [ "$TARGET_ARCH" == "x86_64" ]; then - unset DISTCC - fi - fi - - CC=${CC:-gcc} - if [ "$CCACHE" ]; then - CC="$CCACHE $CC" - [ "$DISTCC" ] && export CCACHE_PREFIX="$DISTCC" - else - [ "$DISTCC" ] && CC="$DISTCC $CC" - fi -} - -tarflags() -{ - case "$1" in - '') - fatal 1 "tarflags(): File name argument missing." - ;; - *.tar.gz | *.tgz) - echo 'zxf' - ;; - *.tar.bz2) - echo 'jxf' - ;; - *.tar) - echo 'xf' - ;; - *) - fatal 1 "tarflags(): Unrecognized tar extension in file: $1" - ;; - esac -} - -untar() -{ - echo "Untarring ${1##*/}..." - tar $(tarflags $1) $1 -} - - -extract_kernel() -{ - (( $UNPACK_KERNEL )) || return 0 - pushd "$TOPDIR" >/dev/null - if [ -d linux ] ; then - [ -L linux ] && rm -rf $(readlink linux) - rm -rf linux - fi - untar "$KERNEL_FILE" - [ -d linux ] || ln -sf linux* linux - popd >/dev/null -} - -patch_kernel() -{ - (( $UNPACK_KERNEL )) || return 0 - [ "$SERIES" ] || return 0 - pushd "$TOPDIR/linux" >/dev/null - for series in $SERIES ; do - echo -n "Applying series $series:" - SERIES_FILE="$TOPDIR/lustre/kernel_patches/series/$series" - for patch in $(<"$SERIES_FILE") ; do - PATCH_FILE="$TOPDIR/lustre/kernel_patches/patches/$patch" - [ -r "$PATCH_FILE" ] || \ - fatal 1 "Patch file not found: $patch" - echo -n " $patch" - patch -s -p1 < "$PATCH_FILE" || fatal 1 "Error applying patch $patch." - done - echo - done - popd >/dev/null -} - -set_make() -{ - MAKE="make -s" - if [ "$CC" ] ; then - MAKE_CC="CC=$CC" - fi - if [ "$ARCH" ] ; then - MAKE_ARCH="$MAKE ARCH=$ARCH" - else - case $TARGET_ARCH in - i?86) - ;; - *) - MAKE_ARCH="$MAKE ARCH=$TARGET_ARCH" - ;; - esac - fi - MAKE_J="$MAKE -j $JOBS" -} - -timed_run() { - SLEEP_TIME=$1 - shift - - set -o monitor - - #bash -c "$@" & - ("$@") & - child_pid=$! - - (sleep $SLEEP_TIME - kill -TERM -$child_pid 2>/dev/null - sleep 5 - kill -KILL -$child_pid 2>/dev/null - echo "$1 was killed due to timeout") & - dog_pid=$! - - wait $child_pid - # status will be set to 143 if the process had to be killed due to timeout - status=${PIPESTATUS[0]} - kill -KILL -$dog_pid - return $status -} - -depend_kernel() -{ - (( $DEPEND_KERNEL )) || return 0 - # we need to override $CC at make time, since there is no - # configure - set_make - pushd "$TOPDIR/linux" >/dev/null - echo "Overriding EXTRAVERSION in kernel..." - perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -${EXTRA_VERSION}${TARGET_CONFIG}/" Makefile - echo "Making depend in $PWD..." - $MAKE "$MAKE_CC" mrproper || fatal 1 "Error running make mrproper" - rm -f rpm-release - # remove localversion-* files to avoid kernel release string - # srewing up by the top-level Makefile - rm -f localversion-* - cp "$CONFIG_FILE" .config - local UPDATE_OLDCONFIG= - for oc in oldconfig_nonint silentoldconfig oldconfig ; do - if grep -q "$oc" Makefile ; then - timed_run 300 $MAKE "$MAKE_CC" $oc || UPDATE_OLDCONFIG=1 - break - fi - done - - if [ "$UPDATE_OLDCONFIG" ] ; then - # use the expect script to "make oldconfig" and answer the questions for - # new items conservatively. QA will get notified on anything newly added - # for them to review and adjust accordingly. - local logfile=$(mktemp /tmp/XXXXXX) - #timed_run 300 $TOPDIR/build/update_oldconfig $logfile - #local RC=${PIPESTATUS[0]} - #local RC=$(strace -f -o update_oldconfig.strace bash -c "$TOPDIR/build/update_oldconfig $logfile; echo \$?") - $TOPDIR/build/update_oldconfig $logfile - local RC=${PIPESTATUS[0]} - #$TOPDIR/build/update_oldconfig $logfile - #local RC=${PIPESTATUS[0]} - if [ $RC -eq 143 ]; then - fatal 1 "update_oldconfig timed out" - elif [ $RC -ne 0 ]; then - # dump the log - cat $logfile - rm -f $logfile - if [ -f update_oldconfig.strace ]; then - cat update_oldconfig.strace - rm -f update_oldconfig.strace - fi - fatal 1 "update_oldconfig failed: $RC. See log above." - fi - rm -f $logfile - # now notify if resulting .config is different than $CONFIG_FILE - local tmpfile=$(mktemp /tmp/XXXXXX) - diff -I '^#.*' -u "$CONFIG_FILE" .config >$tmpfile - if [ -s $tmpfile ]; then - { cat <<EOF -To: qa@lists.clusterfs.com -Subject: kernel_config change - -The result of a make oldconfig on file $CONFIG_FILE resulted in a -difference when compared to .config in the following way: - -EOF - cat $tmpfile - echo -e "\n\nPlease consider updating $CONFIG_FILE." - # not sure these are entirely useful. the above and "patch" are good - #echo -e "\nThe entire new .config file:\n" - #cat .config - } | sendmail -fqa@clusterfs.com -t - fi - rm -f $tmpfile - fi - - case "$VERSION" in - 2.6*) - $MAKE "$MAKE_CC" include/asm - ;; - 2.4*) - $MAKE "$MAKE_CC" symlinks - $MAKE "$MAKE_CC" dep || fatal 1 "Error running make dep" - ;; - esac - $MAKE "$MAKE_CC" include/linux/version.h || fatal 1 "Error making include/linux/version.h" -} - -build_kernel() -{ - (( $BUILD_KERNEL )) || return 0 - set_make - echo "Building kernel in $PWD..." - case "$TARGET_ARCH" in - i386 | i586 | i686 | athlon | x86_64) - $MAKE_J "$MAKE_CC" bzImage || fatal 1 "Error making bzImage." - ;; - ia64 | ppc | ppc64) - $MAKE_J "$MAKE_CC" vmlinux || fatal 1 "Error making vmlinux." - ;; - *) - $MAKE_J "$MAKE_CC" boot || fatal 1 "Error making boot." - ;; - esac - $MAKE_J "$MAKE_CC" modules || fatal 1 "Error building modules." - - popd >/dev/null -} - -configure_lustre() -{ - return 0 - (( $BUILD_LUSTRE )) || return 0 - pushd "$TOPDIR" >/dev/null - [ -f Makefile ] && make -s clean - [ -f configure ] || sh ./autogen.sh - ./configure --with-linux=$PWD/linux $CONFIGURE_FLAGS || \ - fatal 1 "Error configuring Lustre." - popd >/dev/null -} - -build_lustre() -{ - (( $BUILD_LUSTRE )) || return 0 - set_make - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - pushd "$TOPDIR" >/dev/null - sed \ - -e s^@VERSION@^${LUSTRE_VERSION}^g \ - -e s^@LINUXRELEASE@^${FULL_VERSION}^g \ - -e s^@RELEASE@^${FULL_VERSION//-/_}^g \ - -e s^@ac_configure_args@^"--with-linux=${PWD}/linux ${CONFIGURE_FLAGS}"^g \ - < build/lustre.spec.in \ - > build/lustre.spec - $RPMBUILD --target ${TARGET_ARCH} -bb build/lustre.spec \ - --define "_tmppath $TMPDIR" \ - --define "_topdir $(lbuild_topdir)" || \ - fatal 1 "Error building Lustre rpms." - # $MAKE_J "$MAKE_CC" || fatal 1 "Error building Lustre." - popd >/dev/null -} - -install_kernel() -{ - (( $INSTALL_KERNEL )) || return 0 - set_make - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - pushd "$TOPDIR/linux" >/dev/null - mkdir -p "$DESTDIR/boot" - - install -m 644 System.map "$DESTDIR/boot/System.map-${FULL_VERSION}" - # install -m 644 module-info ... - install -m 644 "$CONFIG_FILE" "$DESTDIR/boot/config-${FULL_VERSION}" - - mkdir -p "$DESTDIR/dev/shm" - mkdir -p "$DESTDIR/lib/modules/${FULL_VERSION}" - - $MAKE "$MAKE_CC" INSTALL_MOD_PATH="$DESTDIR" KERNELRELEASE="$FULL_VERSION" \ - -s modules_install || \ - fatal 1 "Error installing modules." - - case "$TARGET_ARCH" in - i386 | i586 | i686 | athlon) - cp arch/i386/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}" - cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/" - ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}" - ;; - x86_64) - cp arch/x86_64/boot/bzImage "$DESTDIR/boot/vmlinuz-${FULL_VERSION}" - cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/" - ln -sf "../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/vmlinux-${FULL_VERSION}" - ;; - ppc | ppc64) - cp vmlinux "$DESTDIR/boot/vmlinux-${FULL_VERSION}" - ln -sf "$DESTDIR/boot/vmlinux-${FULL_VERSION}" "../lib/modules/${FULL_VERSION}/vmlinux" - ;; - ia64) - gzip -cfv vmlinux > vmlinuz - mkdir -p "$DESTDIR/boot/efi/redhat" - install -m 755 vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/" - install -m 755 vmlinuz "$DESTDIR/boot/efi/redhat/vmlinuz-${FULL_VERSION}" - ln -sf "../../../lib/modules/${FULL_VERSION}/vmlinux" "$DESTDIR/boot/efi/redhat/vmlinux-${FULL_VERSION}" - ln -sf "efi/redhat/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}" - ln -sf "efi/redhat/vmlinuz-${FULL_VERSION}" "$DESTDIR/boot/vmlinuz-${FULL_VERSION}" - ;; - *) - cp vmlinuz "$DESTDIR/boot/vmlinuz-${FULL_VERSION}" - cp vmlinux "$DESTDIR/lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}" - ln -sf "../lib/modules/${FULL_VERSION}/vmlinux-${FULL_VERSION}" "$DESTDIR/boot/vmlinux-${FULL_VERSION}" - - ;; - esac - if [ -e init/kerntypes.o ] ; then - cp init/kerntypes.o "$DESTDIR/boot/Kerntypes-${FULL_VERSION}" - fi - - popd >/dev/null -} - -cleanup_libmodules() -{ - (( $INSTALL_LUSTRE )) || return 0 - - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - KVERREL="${VERSION}-${EXTRA_VERSION}" - i="$DESTDIR/lib/modules/${FULL_VERSION}" - - rm -f $i/build - rm -f $i/source - - if (( $LINUX26 )) ; then - ln -sf ../../../usr/src/linux-${KVERREL}-obj/${TARGET_ARCH}/${TARGET_CONFIG} $i/build - ln -sf ../../../usr/src/linux-${KVERREL} $i/source - else - ln -sf ../../../usr/src/linux-${KVERREL} $i/build - fi -} - -install_lustre() -{ - (( $INSTALL_LUSTRE )) || return 0 - return 0 - set_make - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - pushd "$TOPDIR" >/dev/null - $MAKE "$MAKE_CC" -s install "DESTDIR=$DESTDIR" KERNELRELEASE="$FULL_VERSION" || fatal 1 "Error installing Lustre." - popd >/dev/null -} - -build_kms() -{ - (( $BUILD_KERNEL )) || return 0 - (( $SUSEBUILD )) || return 0 - set_make - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - mkdir -p "${TOPDIR}/modules-${FULL_VERSION}" - for dir in /usr/src/kernel-modules/* ; do - # we are replacing lustre-lite, so don't include it - if [ "${dir##*/}" != "lustre-lite" -a -e $dir/Makefile ]; then - build_dir="${TOPDIR}/modules-${FULL_VERSION}/${dir##*/}" - cp -a $dir $build_dir - # these modules are terrible, and don't all build - $MAKE_J "$MAKE_CC" -C $build_dir modules KERNEL_SOURCE="${TOPDIR}/linux" - fi - done -} - -symver() -{ - local file=$1 name=${1%.ko} - nm $file \ - | sed -ne 's,^0*\([0-9a-f]\{8\}\) A __crc_\(.*\),0x\1\t\2\t'"$name"',p' -} - -install_kms() -{ - (( $INSTALL_KERNEL )) || return 0 - (( $LINUX26 )) || return 0 - set_make - FULL_VERSION="${VERSION}-${EXTRA_VERSION}${TARGET_CONFIG}" - for build_dir in "${TOPDIR}/modules-${FULL_VERSION}/*" ; do - [ -d $build_dir ] || continue - # these modules are terrible, and don't all build - $MAKE "$MAKE_CC" -C $build_dir KERNEL_SOURCE="${TOPDIR}/linux" INSTALL_MOD_PATH="$DESTDIR" - done - ( symver vmlinux - moddir="${DESTDIR}/lib/modules/${FULL_VERSION}" - cd $moddir/kernel - for module in $(find * -name '*.ko'); do - symver $module - done - cd $moddir - for module in $(find * -path 'kernel/*' -prune -o \ - -name '*.ko' -print); do - symver $module - done - ) | sort -u -k2 \ - | gzip -c9 > "${DESTDIR}/boot/symvers-${VERSION}-${EXTRA_VERSION}-${TARGET_ARCH}${TARGET_CONFIG}.gz" -} - -save_headers() -{ - (( $SAVE_HEADERS )) || return 0 - - echo "Saving headers for ${TARGET_CONFIG:-up} ${TARGET_ARCH}..." - pushd linux >/dev/null - - KVERREL="${VERSION}-${EXTRA_VERSION}" - # deal with the kernel headers that are version specific - - saveddir="$RPM_BUILD_ROOT/usr/src/linux-${KVERREL}/savedheaders/${TARGET_ARCH}/${TARGET_CONFIG:-up}" - mkdir -p "$saveddir" - install -m 644 include/linux/autoconf.h "$saveddir/autoconf.h" - install -m 644 include/linux/version.h "$saveddir/version.h" - mv include/linux/modules "$saveddir/" - echo ${TARGET_ARCH} ${TARGET_CONFIG} ../../savedheaders/${TARGET_ARCH}/${TARGET_CONFIG:-up}/ \ - >> "$RPM_BUILD_ROOT/usr/src/linux-${KVERREL}/savedheaders/list" - popd >/dev/null -} - -save_all_headers() -{ - (( $SAVE_HEADERS )) || return 0 - - for arch in $BIGMEM_ARCHS ; do - save_headers bigmem $arch - done - - for arch in $BOOT_ARCHS ; do - save_headers BOOT $arch - done - - for arch in $JENSEN_ARCHS ; do - save_headers jensen $arch - done - - for arch in $SMP_ARCHS ; do - save_headers smp $arch - done - - for arch in $BIGSMP_ARCHS ; do - save_headers bigsmp $arch - done - for arch in $PSERIES64_ARCHS ; do - save_headers pseries64 $arch - done - for arch in $UP_ARCHS ; do - save_headers up $arch - done -} - -longopts="build,build-lustre,build-kernel,depend-kernel,destdir:,extraversion:" -longopts="$longopts,help,install,install-lustre,install-kernel,kerneldir:" -longopts="$longopts,save-headers,target:,target-arch:,target-config:,unpack-kernel" - -options=$(getopt -o hj: -l "$longopts" -- "$@") - -eval set -- "$options" - -while [ "$1" ] ; do - case "$1" in - '') - usage 1 - ;; - --build) - BUILD_LUSTRE=1 - BUILD_KERNEL=1 - DEPEND_KERNEL=1 - UNPACK_KERNEL=1 - shift - ;; - --build-lustre) - BUILD_LUSTRE=1 - shift - ;; - --build-kernel) - BUILD_KERNEL=1 - DEPEND_KERNEL=1 - shift - ;; - --depend-kernel) - DEPEND_KERNEL=1 - shift - ;; - --destdir) - DESTDIR=$2 - shift 2 - ;; - --extraversion) - EXTRA_VERSION=$2 - shift 2 - ;; - --help | -h) - usage 0 - ;; - --install) - INSTALL_LUSTRE=1 - INSTALL_KERNEL=1 - shift - ;; - --install-lustre) - INSTALL_LUSTRE=1 - shift - ;; - --install-kernel) - INSTALL_KERNEL=1 - shift - ;; - -j) - JOBS=$2 - shift 2 - ;; - --kerneldir) - KERNELDIR=$2 - shift 2 - ;; - --save-headers) - SAVE_HEADERS=1 - shift - ;; - --target) - TARGET=$2 - shift 2 - ;; - --target-arch) - TARGET_ARCH=$2 - shift 2 - ;; - --target-config) - TARGET_CONFIG=$2 - shift 2 - ;; - --unpack-kernel) - UNPACK_KERNEL=1 - shift - ;; - --) - shift - CONFIGURE_FLAGS=$@ - break - ;; - *) - usage 1 "Unrecognized option: $1" - ;; - esac -done - -check_options -load_target -setup_ccache_distcc - -extract_kernel -patch_kernel - -depend_kernel -build_kernel - -configure_lustre -build_lustre - -build_kms - -install_kernel -install_lustre - -install_kms - -cleanup_libmodules - -save_headers - -exit 0 diff --git a/build/lustre-kernel-2.4.spec.in b/build/lustre-kernel-2.4.spec.in deleted file mode 100644 index c610623689c23dc4cb2ffd62003469fa1dd310b8..0000000000000000000000000000000000000000 --- a/build/lustre-kernel-2.4.spec.in +++ /dev/null @@ -1,966 +0,0 @@ -Summary: The Linux kernel (the core of the Linux operating system) - -# Versions of various parts - -# -# Polite request for people who spin their own kernel rpms: -# please modify the "release" field in a way that identifies -# that the kernel isn't the stock RHL kernel, for example by -# adding some text to the end of the version number. -# -%define kversion @KERNEL_VERSION@ -%define kextraver @KERNEL_EXTRA_VERSION@ -%define release @KERNEL_RELEASE@ -# /usr/src/%{kslnk} -> /usr/src/linux-%{KVERREL} -%define kslnk linux-2.4 - -# groups of related archs -%define all_x86 i386 i686 i586 athlon -#define all_x86 i686 i386 i586 athlon - -%define nptlarchs %{all_x86} -#define nptlarchs noarch -%define rhbuild @RHBUILD@ -%define susebuild @SUSEBUILD@ -%define linux26 @LINUX26@ - -# disable build root strip policy -%define __spec_install_post /usr/lib/rpm/brp-compress || : -# -# RPM foo magic -%define _missing_doc_files_terminate_build 0 -%define _unpackaged_files_terminate_build 0 -%define debug_package %{nil} - -# Enable this to build a board-specific kernel configuration -# some architectures have LOTS of different setups and this -# is a way to deal with that cleanly. -# -#define targetboard assabet -%define dashtargetboard %{?targetboard:-%{targetboard}} -%define withtargetboard 0 -%{?targetboard: %{expand: %%define withtargetboard 1}} - -# Override generic defaults with per-arch defaults (which can -# themselves be overridden with --with/--without). These must -# ONLY be "0", never "1" - -%define buildbase 0 -%define buildbigmem 0 -%define buildBOOT 0 -%define buildjensen 0 -%define buildsmp 0 -%define buildbigsmp 0 -%define buildpseries64 0 -%define buildup 0 -%define buildsrc 0 - -%ifarch @BASE_ARCHS@ -%define buildbase 1 -%endif - -%ifarch @BIGMEM_ARCHS@ -%define buildbigmem 1 -%endif - -%ifarch @BOOT_ARCHS@ -%define buildBOOT 1 -%endif - -%ifarch @JENSEN_ARCHS@ -%define buildjensen 1 -%endif - -%ifarch @SMP_ARCHS@ -%define buildsmp 1 -%endif - -%ifarch @BIGSMP_ARCHS@ -%define buildbigsmp 1 -%endif - -%ifarch @PSERIES64_ARCHS@ -%define buildpseries64 1 -%endif - -%ifarch @UP_ARCHS@ -%define buildup 1 -%endif - -# For board-specific kernels, build only the normal kernel (which may actually be smp, not up). -%if %{withtargetboard} -%define buildsmp 0 -%define buildbigsmp 0 -%define buildBOOT 0 -%define buildbigmem 0 -%define buildpseries64 0 -%define buildjensen 0 -%endif - -%if 0 -Second, per-architecture exclusions (ifarch) -%ifarch i386 -%define buildsmp 0 -%endif -%ifarch ia64 -%define buildBOOT 0 -%endif -%endif - -# we can't test values inline, only whether a macro exists -%{expand: %%define buildup_%{buildup} yadda} -%{expand: %%define buildsmp_%{buildsmp} yadda} -%{expand: %%define buildbigsmp_%{buildbigsmp} yadda} -%{expand: %%define buildpseries64_%{buildpseries64} yadda} -%{expand: %%define buildBOOT_%{buildBOOT} yadda} -%{expand: %%define buildbigmem_%{buildbigmem} yadda} -%{expand: %%define buildjensen_%{buildjensen} yadda} -%{expand: %%define ikd_%{ikd} yadda} -%{expand: %%define ibcs_%{ibcs} yadda} -%{expand: %%define debuglevel_%{debugging} yadda} - -%{expand: %%define kernel_conflicts ppp <= 2.3.15, pcmcia-cs <= 3.1.20, isdn4k-utils <= 3.0, mount < 2.10r-5, nfs-utils < 0.3.1, cipe < 1.4.5, tux < 2.1.0, kudzu <= 0.92, e2fsprogs < 1.22, initscripts < 5.84, dev < 3.2-7, iptables < 1.2.5-3, bcm5820 < 1.81, nvidia-rh72 <= 1.0, oprofile < 0.4} - -%if %{rhbuild} -%define BOOT_kernel_prereq fileutils, modutils >= 2.4.18 -%define kernel_prereq %{BOOT_kernel_prereq}, initscripts >= 5.83, mkinitrd >= 3.2.6 -%endif - -%ifarch ia64 -%define initrd_dir /boot/efi/redhat -%else -%define initrd_dir /boot -%endif - -%ifarch %{all_x86} x86_64 -%define kernel_glob vmlinu?-%{KVERREL} -%endif -%ifarch ia64 -# <sigh>, no GLOB_BRACE for filelists, efi needs to be done separately -%define kernel_glob vmlinuz-%{KVERREL} -%endif -%ifarch alpha -%define kernel_glob vmlinu?-%{KVERREL} -%endif -%ifarch ppc ppc64 -%define kernel_glob vmlinu?-%{KVERREL} -%endif - -Name: kernel-lustre -Version: %{kversion} -Release: %{release}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg} -%define KVERREL %{PACKAGE_VERSION}-%{kextraver}%{?targetboard:%{targetboard}}%{?debuglevel_1:.dbg} -License: GPL -Group: System Environment/Kernel -ExclusiveArch: %{all_x86} x86_64 ia64 ppc -ExclusiveOS: Linux -Obsoletes: kernel-modules, kernel-sparc -Provides: kernel = %{version} -BuildConflicts: rhbuildsys(DiscFree) < 500Mb -%ifarch %{all_x86} ia64 x86_64 -Provides: kernel-drm = 4.1.0, kernel-drm = 4.2.0, kernel-drm = 4.3.0, kernel-drm = 4.2.99.3 -%endif -Autoreqprov: no -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} - -BuildPreReq: patch >= 2.5.4, bash >= 2.03, sh-utils, gnupg, tar -BuildPreReq: bzip2, findutils, dev, gzip, m4 -%endif - -Vendor: Cluster File Systems, Inc. -URL: http://www.kernel.org/ -Buildroot: %{_tmppath}/%{name}-%{version}-root - -Source0: @LUSTRE_SOURCE@ -Source1: @KERNEL_SOURCE@ -Source2: external-patches.tar.gz - -Source15: linux-rhconfig.h -Source16: linux-merge-config.awk -Source17: linux-merge-modules.awk - -Source25: suse-functions.sh -Source26: suse-post.sh -Source27: suse-postun.sh -Source28: suse-trigger-script.sh.in -Source29: sles8-post.sh -Source30: sles8-postun.sh -Source31: sles8-pre.sh -Source32: sles8-update_INITRD_MODULES.sh -Source33: sles8-update_rcfile_setting.sh - -%package source -Summary: The source code for the Linux kernel. -Group: Development/System -Prereq: fileutils -Requires: gawk -Requires: gcc >= 2.96-98 -Autoreqprov: 0 - -%package doc -Summary: Various documentation bits found in the kernel source. -Group: Documentation - -%description -The kernel package contains the Linux kernel (vmlinuz), the core of a -Linux operating system. The kernel handles the basic functions of the -operating system: memory allocation, process allocation, device input -and output, etc. - -%description source -The kernel-source package contains the source code files for the Linux -kernel. These source files are needed to build custom/third party device -drivers. The source files can also be used to build a custom kernel that is -better tuned to your particular hardware, if you are so inclined (and you -know what you're doing). - -%description doc -This package contains documentation files form the kernel -source. Various bits of information about the Linux kernel and the -device drivers shipped with it are documented in these files. - -You'll want to install this package if you need a reference to the -options that can be passed to Linux kernel modules at load time. - -%package smp -Summary: The Linux kernel compiled for SMP machines. -Group: System Environment/Kernel -Provides: module-info, kernel = %{version} -%ifarch %{all_x86} ia64 x86_64 -Provides: kernel-drm = 4.1.0, kernel-drm = 4.2.0, kernel-drm = 4.3.0, kernel-drm = 4.2.99.3 -%endif -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description smp -This package includes a SMP version of the Linux kernel. It is -required only on machines with two or more CPUs, although it should -work fine on single-CPU boxes. - -Install the kernel-smp package if your machine uses two or more CPUs. - -%package bigsmp -Summary: The Linux kernel compiled for SMP machines. -Group: System/Kernel -Provides: module-info, kernel = %{version}, k_smp4G -Obsoletes: k_smp4G -%ifarch %{all_x86} ia64 x86_64 -Provides: kernel-drm = 4.1.0, kernel-drm = 4.2.0, kernel-drm = 4.3.0, kernel-drm = 4.2.99.3 -%endif -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description bigsmp -This package includes a SMP version of the Linux kernel. It is -required only on machines with two or more CPUs, although it should -work fine on single-CPU boxes. - -Install the kernel-bigsmp package if your machine uses two or more CPUs. - -%package pseries64 -Summary: Standard Kernel for 64-bit Power based SMP and LPAR Machines -Group: System/Kernel -Provides: module-info, kernel = %{version}, k_smp4G -%ifarch %{all_x86} ia64 x86_64 -Provides: kernel-drm = 4.1.0, kernel-drm = 4.2.0, kernel-drm = 4.3.0, kernel-drm = 4.2.99.3 -%endif -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description pseries64 -The standard kernel for Power3, Power4 and PowerPC 970 64-bit SMP -machines. - -This kernel can be used for all 64bit RS/6000, pSeries and JS20 -machines. - -%package bigmem -Summary: The Linux Kernel for machines with more than 4 Gigabyte of memory. -Group: System Environment/Kernel -Provides: module-info, kernel = %{version} -%ifarch %{all_x86} ia64 x86_64 -Provides: kernel-drm = 4.1.0, kernel-drm = 4.2.0, kernel-drm = 4.3.0, kernel-drm = 4.2.99.3 -%endif -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} -Obsoletes: kernel-enterprise <= 2.4.10 -%endif - -%description bigmem -This package includes a kernel that has appropriate configuration options -enabled for Pentium III machines with 4 Gigabyte of memory or more. - -%package BOOT -Summary: The version of the Linux kernel used on installation boot disks. -Group: System Environment/Kernel -Provides: kernel = %{version} -%if %{rhbuild} -Prereq: %{BOOT_kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description BOOT -This package includes a trimmed down version of the Linux kernel. -This kernel is used on the installation boot disks only and should not -be used for an installed system, as many features in this kernel are -turned off because of the size constraints. - -%package BOOTsmp -Summary: The Linux kernel used on installation boot disks for SMP machines. -Group: System Environment/Kernel -Provides: kernel = %{version} -%if %{rhbuild} -Prereq: %{BOOT_kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description BOOTsmp -This package includes a trimmed down version of the Linux kernel. This -kernel is used on the installation boot disks only and should not be used -for an installed system, as many features in this kernel are turned off -because of the size constraints. This kernel is used when booting SMP -machines that have trouble coming up to life with the uniprocessor kernel. - -%package jensen -Summary: The Linux Kernel compiled for the Alpha Jensen platform. -Group: System Environment/Kernel -Provides: kernel = %{version} -%if %{rhbuild} -Prereq: %{kernel_prereq} -Conflicts: %{kernel_conflicts} -%endif - -%description jensen -This package includes a kernel that has appropriate configuration -options enabled for use on the Alpha Jensen platform. The Jensen -platform is not supported in the normal generic alpha kernel support. - -%package -n lustre-lite-utils -Summary: Lustre utils for Linux -Group: Applications/System - -%description -n lustre-lite-utils -The Lustre Lite file system utilities. This includes the tools needed -to configure, mount, and administer a Lustre filesystem. This package -is necessary if you want to access a Lustre filesystem. - -# the lustre-doc files are just included as %doc with -# lustre-lite-utils - -#%package -n lustre-doc -#Summary: Sample Lustre configurations and documentation -#Group: Documentation - -#%description -n lustre-doc -#The Lustre book, sample configurations, and other documentation for -#Lustre. - -%prep -%setup -n lustre-kernel-%{version} -q -c -if [ ! -d lustre ] ; then - ln -sf lustre* lustre -fi -pushd lustre >/dev/null -if [ -s "%{SOURCE2}" ] ; then - tar zxf "%{SOURCE2}" -C lustre/kernel_patches -fi -sh -x ./build/lmake \ - --unpack-kernel \ - --target @LUSTRE_TARGET@ \ - --target-arch %{_target_cpu} \ - --kerneldir $RPM_SOURCE_DIR -popd >/dev/null - -# handle both SuSE and Red Hat's new-kernel-pkg bits -for flavor in "" smp bigmem bigsmp pseries64 BOOT jensen ; do - for when in pre preun post postun ; do - script="${when}${flavor}.sh" - cat %{SOURCE25} %{SOURCE32} %{SOURCE33} > ${script} - echo "if [ -d /etc/susehelp.d ] ; then" >> ${script} - sed -e "s/@when@/$when/g" -e "s^%ver_str^%{KVERREL}${flavor}^g" %{SOURCE28} >> ${script} - case $when in - pre) - echo "if [ ! -f /etc/modprobe.conf ] ; then" >> ${script} - cat %{SOURCE31} >> ${script} - echo "fi" >> ${script} - ;; - post) - # /sbin/update-modules.dep compares when the modules were built, rather - # than installed, so force modules.dep to be recreated - echo "rm -f /lib/modules/%{KVERREL}${flavor}/modules.dep" >> ${script} - echo "if [ -f /etc/modprobe.conf ] ; then" >> ${script} - sed -e "s^%ver_str^%{KVERREL}${flavor}^g" %{SOURCE26} >> ${script} - - echo "else" >> ${script} - sed -e "s^%ver_str^%{KVERREL}${flavor}^g" -e "s^%%{cfg_name}^${flavor}^g" %{SOURCE29} >> ${script} - echo "fi" >> ${script} - ;; - postun) - echo "if [ -f /etc/modprobe.conf ] ; then" >> ${script} - sed -e "s^%ver_str^%{KVERREL}${flavor}^g" %{SOURCE27} >> ${script} - - echo "else" >> ${script} - sed -e "s^%ver_str^%{KVERREL}${flavor}^g" %{SOURCE30} >> ${script} - echo "fi" >> ${script} - ;; - esac - echo "exit 0; fi" >> ${script} - case $when in - post) - if [ -z "${flavor}" ] ; then - cat >> ${script} <<EOF -cd /boot -%ifnarch ia64 -ln -sf vmlinuz-%{KVERREL} vmlinuz -%endif -ln -sf System.map-%{KVERREL} System.map -ln -sf module-info-%{KVERREL} module-info -EOF - fi - cat >> ${script} <<EOF -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade -[ -x /sbin/mkkerneldoth ] && /sbin/mkkerneldoth -if [ -x /sbin/new-kernel-pkg ] ; then - if /sbin/new-kernel-pkg 2>&1 | grep package >/dev/null ; then - PACKAGE_ARG="--package kernel-${flavor}" - fi - /sbin/new-kernel-pkg $PACKAGE_ARG --mkinitrd --depmod \ - --install %{KVERREL}${flavor} -fi -EOF - ;; - postun) - ;; - pre) - cat >> ${script} <<EOF -/sbin/modprobe loop 2>/dev/null >/dev/null || : -exit 0 -EOF - ;; - preun) - cat >> ${script} <<EOF -/sbin/modprobe loop 2> /dev/null > /dev/null || : -rm -f /lib/modules/%{KVERREL}${flavor}/modules.* -if [ -x /sbin/new-kernel-pkg ] ; then - /sbin/new-kernel-pkg --rminitrd --rmmoddep --remove %{KVERREL}${flavor} -fi -EOF - ;; - esac - done - - cat > "kernel$flavor.files" <<EOF -%%defattr(-, root, root) -/boot/%{kernel_glob}${flavor} -/boot/System.map-%{KVERREL}${flavor} -/boot/config-%{KVERREL}${flavor} -%%dir /lib/modules -%%dir /dev/shm -/lib/modules/%{KVERREL}${flavor} -%ifarch ia64 -/boot/efi/redhat/%{kernel_glob}${flavor} -%endif -%if %{linux26} -%if %{susebuild} -/boot/Kerntypes-%{KVERREL}${flavor} -%endif -/boot/symvers-%{KVERREL}-%{_target_cpu}${flavor}.gz -%endif -EOF - -done - -cat > kernel-source.files <<EOF -%%defattr(-,root,root) -%%dir /usr/src/linux-%{KVERREL} -/usr/src/linux-%{KVERREL}/* -%if %{linux26} -%%dir /usr/src/linux-%{KVERREL}-obj -/usr/src/linux-%{KVERREL}-obj/* -%endif -EOF - -%build -# if RPM_BUILD_NCPUS unset, set it -if [ -z "$RPM_BUILD_NCPUS" ] ; then - RPM_BUILD_NCPUS=$(egrep -c "^cpu[0-9]+" /proc/stat || :) - if [ $RPM_BUILD_NCPUS -eq 0 ] ; then - RPM_BUILD_NCPUS=1 - fi - if [ $RPM_BUILD_NCPUS -gt 8 ] ; then - RPM_BUILD_NCPUS=8 - fi -fi - -rm -rf $RPM_BUILD_ROOT -mkdir -p $RPM_BUILD_ROOT - -DependKernel() -{ - target_config=${1:+--target-config $1} - sh -x ./build/lmake \ - --depend-kernel \ - --target @LUSTRE_TARGET@ \ - --target-arch %{_target_cpu} \ - ${target_config} \ - --extraversion %{kextraver} \ - -j $RPM_BUILD_NCPUS -} - -BuildKernel() -{ - target_config=${1:+--target-config $1} - sh -x ./build/lmake \ - --build-kernel --build-lustre \ - --install \ - --save-headers \ - --target @LUSTRE_TARGET@ \ - --target-arch %{_target_cpu} \ - ${target_config} \ - --extraversion %{kextraver} \ - --kerneldir $RPM_SOURCE_DIR \ - -j $RPM_BUILD_NCPUS \ - --destdir $RPM_BUILD_ROOT \ - -- --disable-tests \ - @CONFIGURE_FLAGS@ -} - -BuildLustre() -{ - target_config=${1:+--target-config $1} - sh -x ./build/lmake \ - --build-lustre \ - --install-lustre \ - --target @LUSTRE_TARGET@ \ - --target-arch %{_target_cpu} \ - ${target_config} \ - --extraversion %{kextraver} \ - --kerneldir $RPM_SOURCE_DIR \ - -j $RPM_BUILD_NCPUS \ - --destdir $RPM_BUILD_ROOT \ - -- --enable-utils \ - --disable-doc --disable-tests \ - --disable-modules --disable-liblustre \ - --sysconfdir=%{_sysconfdir} \ - --mandir=%{_mandir} \ - @CONFIGURE_FLAGS@ -} - -SaveHeaders() -{ - sh -x ./build/lmake \ - --save-headers \ - --target @LUSTRE_TARGET@ \ - --target-arch %{_target_cpu} \ - --extraversion %{kextraver} \ - --destdir $RPM_BUILD_ROOT -} - -pushd lustre >/dev/null - -%if %{buildbigmem} -BuildKernel bigmem -%endif - -%if %{buildBOOT} -BuildKernel BOOT -%endif - -%if %{buildjensen} -BuildKernel jensen -%endif - -%if %{buildsmp} -BuildKernel smp -%endif - -%if %{buildbigsmp} -BuildKernel bigsmp -%endif - -%if %{buildpseries64} -BuildKernel pseries64 -%endif - -%if %{buildup} -BuildKernel -%endif - -%if %{buildbase} -# BuildLustre -%endif - -popd >/dev/null - -%install -pushd lustre >/dev/null -# it's already installed, so just clean up some things that are rpm -# specific -for i in $RPM_BUILD_ROOT/lib/modules/* ; do - rm -f $i/modules.* -%ifarch %{ntplarchs} - # remove legacy pcmcia symlink that's no longer useful - rm -rf $i/pcmcia -%endif -done - -# mark the vmlinux* non-executable to fool strip-to-file -chmod a-x $RPM_BUILD_ROOT/boot/vmlinux* - -BuildObj () -{ - flavor=$1 - if [ $flavor = "up" ] ; then - flavext="" - flavtgt="" - else - flavext="-$flavor" - flavtgt="$flavor" - fi - perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{kextraver}${flavtgt}/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile - c="$RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}" - o="${c}-obj/%{_target_cpu}/$flavor" - mkdir -p $o - # use the one we just built - #cp ../lustre/kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@-%{_target_cpu}%{dashtargetboard}${flavext}.config \ - cp .config $o/.config - for oc in oldconfig_nonint silentoldconfig oldconfig ; do - if grep -q "$oc" Makefile ; then - OLDCONFIG="$oc" - break - fi - done - MAKE="make -s O=$o -C ${c}" - if [ "$CC" ] ; then - MAKE_CC="CC=$CC" - fi - $MAKE "$MAKE_CC" $OLDCONFIG - $MAKE "$MAKE_CC" prepare-all - $MAKE clean - rm -rf $o/.config.old $o/include/config - # Replace the Makefile in the object directory with a version - # that has relative path names. - read VERSION PATCHLEVEL SUBLEVEL <<-EOF -$(set -- 2.6.5 ; echo ${*//./ }) -EOF - source scripts/mkmakefile \ - ../../../linux-%{KVERREL} \ - ../linux-%{KVERREL}-obj/%{_target_cpu}/$flavor \ - $VERSION \ - $PATCHLEVEL \ - > $o/Makefile - zcat "$RPM_BUILD_ROOT/boot/symvers-%{KVERREL}-%{_target_cpu}${flavtgt}.gz" \ - > $o/Module.symvers -} - -## -## do -source package cleanup/install -## -if [ "%{buildbase}" -ne 0 ] ; then - pushd linux >/dev/null - mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - rm -f drivers/net/hamradio/soundmodem/gentbl scripts/mkdep - tar cf - . | tar xf - -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - ln -sf linux-%{KVERREL} $RPM_BUILD_ROOT/usr/src/linux - # install -m 644 %{SOURCE10} $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - - #clean up the destination - make -s mrproper -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs - mkdir -p $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs - cp ../lustre/kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@*.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/configs - if [ "%{linux26}" -ne 0 ] ; then - # this only works because CFS only builds one kernel per target/arch per kernel-source rpm - objdir=$RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}-obj - mkdir -p $objdir - if [ "%{buildbigmem}" -ne 0 ] ; then - BuildObj bigmem - fi - if [ "%{buildBOOT}" -ne 0 ] ; then - BuildObj BOOT - fi - if [ "%{buildjensen}" -ne 0 ] ; then - BuildObj jensen - fi - if [ "%{buildsmp}" -ne 0 ] ; then - BuildObj smp - fi - if [ "%{buildbigsmp}" -ne 0 ] ; then - BuildObj bigsmp - fi - if [ "%{buildpseries64}" -ne 0 ] ; then - BuildObj pseries64 - fi - if [ "%{buildup}" -ne 0 ] ; then - BuildObj up - fi - perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{kextraver}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile - # Remove $RPM_BUILD_ROOT prefix from symlinks. - for link in $(find $objdir -type l); do - target=$(readlink $link) - rm -f $link - ln -s ${target/$RPM_BUILD_ROOT/} $link - done - else # 2.4 rh-style - perl -p -i -e "s/^EXTRAVERSION.*/EXTRAVERSION = -%{kextraver}custom/" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile - # get the one from the build we just completed as it might have picked - # up new options - #cp ../lustre/kernel_patches/kernel_configs/kernel-%{kversion}-@LUSTRE_TARGET@-%{_target_cpu}%{dashtargetboard}.config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/.config - cp .config $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/.config - for oc in oldconfig_nonint silentoldconfig oldconfig ; do - if grep -q "$oc" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/Makefile ; then - OLDCONFIG="$oc" - break - fi - done - if [ "$CC" ] ; then - MAKE_CC="CC=$CC" - fi - make "$MAKE_CC" -s $OLDCONFIG -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - make "$MAKE_CC" -s symlinks -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - make "$MAKE_CC" -s include/linux/version.h -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - - #this generates modversions info which we want to include and we may as - #well include the depends stuff as well, after we fix the paths - make -s depend -C $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} - find $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL} -name ".*depend" | \ - while read file ; do - mv $file $file.old - sed -e "s|[^ ]*\(/usr/src/linux\)|\1|g" < $file.old > $file - rm -f $file.old - done - - # Try to put some smarter autoconf.h and version.h files in place - pushd $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/include/linux ; { - rm -rf modules modversions.h autoconf.h version.h - cat > modversions.h <<EOF -#ifndef _LINUX_MODVERSIONS_H -#define _LINUX_MODVERSIONS_H -#include <linux/rhconfig.h> -#include <linux/modsetver.h> -EOF - echo '#include <linux/rhconfig.h>' > autoconf.h - list=`find ../../savedheaders/* -name '*.ver' -exec basename '{}' \; | sort` - mkdir modules - for l in $list; do - sed 's,$,modules/'$l, ../../savedheaders/list | awk -f %{SOURCE17} > modules/$l - touch -r modules/$l modules/`basename $l .ver`.stamp - echo '#include <linux/modules/'$l'>' >> modversions.h - done - echo '#endif' >> modversions.h - sed 's,$,autoconf.h,' ../../savedheaders/list | awk -f %{SOURCE16} >> autoconf.h - install -m 644 %{SOURCE15} rhconfig.h - echo "#include <linux/rhconfig.h>" >> version.h - keyword=if - for i in smp BOOT BOOTsmp bigmem bigsmp pseries64 up ; do - # When we build in an i386, we don't have an bigmem header directory - # in savedheaders/i386/bigmem. We also don't have a BOOT directory - # anywhere except in savedheaders/i386. So, we need to use this method - # of determining if a kernel version string needs to be included in the - # version.h file - verh=`echo ../../savedheaders/*/$i/version.h | awk ' { print $1 } '` - if [ -n "$verh" -a -f "$verh" ]; then - if [ "$i" = up ]; then - if [ "$keyword" = if ]; then - echo "#if 0" >> version.h - fi - echo "#else" >> version.h - else - echo "#$keyword defined(__module__$i)" >> version.h - keyword=elif - fi - grep UTS_RELEASE $verh >> version.h - fi - done - echo "#endif" >> version.h - if [ -f ../../savedheaders/%{_target_cpu}/up/version.h ] ; then - # keep to a standard normally - HEADER_FILE=../../savedheaders/%{_target_cpu}/up/version.h - else - # test build not including uniprocessor, must get info from somewhere - HEADER_FILE=$(ls ../../savedheaders/*/*/version.h | head -n 1) - fi - grep -v UTS_RELEASE $HEADER_FILE >> version.h - rm -rf ../../savedheaders - } ; popd - touch $RPM_BUILD_ROOT/boot/kernel.h-%{kversion} - - # rm -f $RPM_BUILD_ROOT/usr/include/linux - - rm -rf $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/savedheaders - - if [ "%{rhbuild}" -ne 0 ] ; then - # fix up the tmp_include_depends file wrt the buildroot - perl -p -i -e "s|$RPM_BUILD_ROOT||g" $RPM_BUILD_ROOT/usr/src/linux-%{KVERREL}/tmp_include_depends - fi - fi # linux26 - popd >/dev/null -fi # buildbase - -popd >/dev/null - -%clean -rm -rf $RPM_BUILD_ROOT - -### -### scripts -### - -# do this for upgrades...in case the old modules get removed we have -# loopback in the kernel so that mkinitrd will work. -%pre -f pre.sh - -%pre smp -f presmp.sh - -%pre bigsmp -f prebigsmp.sh - -%pre pseries64 -f prepseries64.sh - -%pre bigmem -f prebigmem.sh - -%post -f post.sh - -%post pseries64 -f postpseries64.sh - -%post smp -f postsmp.sh - -%post bigsmp -f postbigsmp.sh - -%post bigmem -f postbigmem.sh - -%post jensen -f postjensen.sh - -%ifnarch ia64 -%post BOOT -f postBOOT.sh - -%endif - -%post -n lustre-lite-utils -if [ -f /etc/init.d/lustre ] ; then - /sbin/chkconfig --add lustre - /sbin/chkconfig --add lustrefs -fi - -# Allow clean removal of modules directory -%preun -f preun.sh - -%preun pseries64 -f preunpseries64.sh - -%preun smp -f preunsmp.sh - -%preun bigsmp -f preunbigsmp.sh - -%preun bigmem -f preunbigmem.sh - -%preun BOOT -f preunBOOT.sh - -%preun jensen -f preunjensen.sh - -# suse needs these i guess -%postun -f postun.sh - -%postun pseries64 -f postunpseries64.sh - -%postun smp -f postunsmp.sh - -%postun bigsmp -f postunbigsmp.sh - -%postun bigmem -f postunbigmem.sh - -%postun BOOT -f postunBOOT.sh - -%postun jensen -f postunjensen.sh - -# We need this here because we don't prereq kudzu; it could be -# installed after the kernel -%triggerin -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin smp -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin bigsmp -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin pseries64 -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin bigmem -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin BOOT -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - -%triggerin jensen -- kudzu -[ -x /usr/sbin/module_upgrade ] && /usr/sbin/module_upgrade || : - - -# Old kernel-headers packages owned include symlinks; new -# ones just make them so that we can have multiple kernel-headers -# packages installed. - -%triggerpostun source -- kernel-headers < 2.2.16 -cd /usr/src -rm -f %{kslnk} -ln -snf linux-%{KVERREL} %{kslnk} -exit 0 - -%post source -cd /usr/src -rm -f %{kslnk} -ln -snf linux-%{KVERREL} %{kslnk} - -%postun source -if [ -L /usr/src/%{kslnk} ]; then - if [ -L /usr/src/%{kslnk} -a `ls -ld /usr/src/%{kslnk} 2>/dev/null| awk '{ print $11 }'` = "linux-%{KVERREL}" ]; then - [ $1 = 0 ] && rm -f /usr/src/%{kslnk} - fi -fi -exit 0 - -%preun -n lustre-lite-utils -if [ $1 = 0 -a -f /etc/init.d/lustre ]; then - /sbin/chkconfig --del lustre - /sbin/chkconfig --del lustrefs -fi - -### -### file lists -### - -%if %{buildup} -%files -f kernel.files -%endif - -%if %{buildsmp} -%files smp -f kernelsmp.files -%endif - -%if %{buildbigsmp} -%files bigsmp -f kernelbigsmp.files -%endif - -%if %{buildpseries64} -%files pseries64 -f kernelpseries64.files -%endif - -%if %{buildbigmem} -%files bigmem -f kernelbigmem.files -%endif - -%if %{buildBOOT} -%files BOOT -f kernelBOOT.files -%endif - -%if %{buildbase} -%files source -f kernel-source.files -%endif diff --git a/build/lustre.spec.in b/build/lustre.spec.in deleted file mode 100644 index 4a9ef8eb02848e2324e86c7a1378052bb626f2d6..0000000000000000000000000000000000000000 --- a/build/lustre.spec.in +++ /dev/null @@ -1,199 +0,0 @@ -# lustre.spec -%define version @VERSION@ -%define kversion @LINUXRELEASE@ - -Summary: Lustre File System -Name: lustre -Version: %{version} -Release: @RELEASE@ -License: GPL -Group: Utilities/System -Source: lustre-%{version}.tar.gz -URL: http://clusterfs.com/ -BuildRoot: %{_tmppath}/lustre-%{version}-root -Obsoletes: lustre-lite, lustre-lite-utils, lustre-ldap nfs-utils-lustre -Provides: lustre-lite = %{version}, lustre-lite-utils = %{version} -# GSS requires this: BuildRequires: pkgconfig, libgssapi-devel >= 0.10 - -%description -Userspace tools and files for the Lustre file system. - -%package modules -Summary: Kernel Lustre modules for Linux %{kversion} -Requires: modutils >= 2.4.10 -Group: Development/Kernel - -%description modules -Lustre file system, server and network drivers for Linux %{kversion}. - -%package source -Summary: Object-Based Disk storage driver source -Group: Development/Kernel - -%description source -Lustre sources for further development - -# Since the RPMs we ship are to be used on both SLES and RHEL, we -# can't include any dependency information (since the package names -# are different on the two platforms). -# -# Instead, we can build these empty meta-packages that only include -# dependency information. These let people get the correct -# dependencies for their platform and lets them use tools like yum and -# red carpet to install the correct files. -# -# Unfortunately I have not seen this come up on the lists much, so I -# have disabled them (by commenting out their empty files section -# below) until it's clear that they resolve more confusion than they -# add. - -%package deps-sles -Summary: Lustre dependencies meta-package for SLES -Group: Utilities/System -Provides: lustre-deps = %{version} -Requires: lustre = %{version}, sles-release -Conflicts: lustre-deps-rhel - -%description deps-sles -This package has RPM dependencies appropriate for SLES systems. - -%package deps-rhel -Summary: Lustre dependencies meta-package for RHEL -Group: Utilities/System -Provides: lustre-deps = %{version} -Requires: lustre = %{version}, redhat-release -Conflicts: lustre-deps-sles - -%description deps-rhel -This package has RPM dependencies appropriate for RHEL, RHL, and FC -systems. - -%prep -%setup -qn lustre-%{version} - -%build -# if RPM_BUILD_NCPUS unset, set it -if [ -z "$RPM_BUILD_NCPUS" ] ; then - RPM_BUILD_NCPUS=$(egrep -c "^cpu[0-9]+" /proc/stat 2>/dev/null || echo 0 :) - if [ $RPM_BUILD_NCPUS -eq 0 ] ; then - RPM_BUILD_NCPUS=1 - fi - if [ $RPM_BUILD_NCPUS -gt 8 ] ; then - RPM_BUILD_NCPUS=8 - fi -fi - -rm -rf $RPM_BUILD_ROOT - -# Set an explicit path to our Linux tree, if we can. -cd $RPM_BUILD_DIR/lustre-%{version} -./configure @ac_configure_args@ %{?configure_flags:configure_flags} \ - --sysconfdir=%{_sysconfdir} \ - --mandir=%{_mandir} \ - --libdir=%{_libdir} -make -j $RPM_BUILD_NCPUS -s - -%install -make install DESTDIR=$RPM_BUILD_ROOT -# hack to avoid changing the libsysio code for "make install" -rm -f $RPM_BUILD_ROOT%{_libdir}/libsysio.a - -# Create the pristine source directory. -cd $RPM_BUILD_DIR/lustre-%{version} -mkdir -p $RPM_BUILD_ROOT/usr/src -rm -f lustre-source -ln -s $RPM_BUILD_ROOT/usr/src lustre-source -make distdir distdir=lustre-source/lustre-%{version} - -cat >lustre.files <<EOF -%attr(-, root, root) /sbin/mount.lustre -%attr(-, root, root) /usr/sbin/* -%attr(-, root, root) /usr/bin/* - -%attr(-, root, root) /usr/share/lustre/* - -%attr(-, root, root) %{_libdir}/libptlctl.a -%attr(-, root, root) %{_libdir}/liblustreapi.a -%attr(-, root, root) /usr/include/lustre - -%attr(-, root, root) %{_mandir}/man?/* - -%attr(-, root, root) %{_libdir}/lustre/lc_common -EOF - -if [ -f $RPM_BUILD_ROOT%{_libdir}/liblustre.so ] ; then - echo '%attr(-, root, root) %{_libdir}/liblustre.a' >>lustre.files - echo '%attr(-, root, root) %{_libdir}/liblustre.so' >>lustre.files -fi - -if [ -f $RPM_BUILD_DIR/lustre-%{version}/lustre/utils/libiam.c ] ; then - echo '%attr(-, root, root) %{_libdir}/libiam.a' >>lustre.files -fi - -if [ -d $RPM_BUILD_ROOT%{_libdir}/lustre/snmp ] ; then - echo '%attr(-, root, root) %{_libdir}/lustre/snmp' >>lustre.files - echo '%attr(-, root, root) %{_datadir}/lustre/snmp/mibs' >>lustre.files -fi - -# Have universal lustre headers -if [ -f $RPM_BUILD_DIR/lustre-%{version}/lustre/include/lustre/lustre_idl.h ] ; then - echo '%attr(-, root, root) /usr/include/lustre/lustre_idl.h' >>lustre.files - echo '%attr(-, root, root) /usr/include/linux/lustre_types.h' >>lustre.files - echo '%attr(-, root, root) /usr/include/linux/lustre_user.h' >>lustre.files -else - echo '%attr(-, root, root) /usr/include/linux/lustre_idl.h' >>lustre.files -fi - -%files -f lustre.files - -%files modules -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre - -%files source -%attr(-, root, root) /usr/src/lustre-%{version} - -# uncomment these lines to enable deps packages -# %files deps-sles -# %files deps-rhel - -%post modules -if [ -f /boot/System.map-%{kversion} ]; then - depmod -ae -F /boot/System.map-%{kversion} %{kversion} || exit 0 -else - depmod -ae %{kversion} || exit 0 -fi - -# for update from < v1.4.6 - -for f in /etc/modules.conf /etc/modprobe.conf /etc/modprobe.conf.local ; -do - if [ -f $f ]; then - if grep 'lustre llite' $f >/dev/null 2>/dev/null ; then - [ ! -f $f.rpmsave ] && cp $f $f.rpmsave - TMPFILE=`mktemp $f.XXXXXX` && \ - rm -f $TMPFILE && touch $TMPFILE && \ - grep -v 'lustre llite' $f >> $TMPFILE && \ - mv $TMPFILE $f - fi - if egrep "^[^#]*(add below|install) ptlrpc" $f ; then - [ ! -f $f.rpmsave ] && cp $f $f.rpmsave - TMPFILE=`mktemp $f.XXXXXX` && \ - rm -f $TMPFILE && touch $TMPFILE && \ - sed -e "s/^[^#]*\(add below\|install\) ptlrpc.*/#&/" $f >> $TMPFILE && \ - mv $TMPFILE $f - fi - fi -done - - -%postun modules -if [ -f /boot/System.map-%{kversion} ]; then - depmod -ae -F /boot/System.map-%{kversion} %{kversion} || exit 0 -else - depmod -ae %{kversion} || exit 0 -fi - -%clean -rm -rf $RPM_BUILD_ROOT diff --git a/build/osxpack/License.txt b/build/osxpack/License.txt deleted file mode 100644 index eb250a6c632bdb84ff3de013c6d0b9d281811ae9..0000000000000000000000000000000000000000 --- a/build/osxpack/License.txt +++ /dev/null @@ -1,8 +0,0 @@ -Lustre(R) for OS X - -This software is a proprietary product of Cluster File Systems, Inc. which owns all intellectual property rights to this software. - -You must obtain a license from Cluster File Systems and agree to its terms before using this software. Contact sales@clusterfs.com - -This software is beta software modifying and running kernel code. Back up all systems before use. This software is provided to you without any warranty, without even the implied warranty of merchantability, or fitness for a particular purpose. Consult your license agreement for more details. - diff --git a/build/replace1.sh b/build/replace1.sh deleted file mode 100755 index cb0c6889dabf29e058422c81cae088d93ad61519..0000000000000000000000000000000000000000 --- a/build/replace1.sh +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/bash -e - -progname=${0##*/} - -CONFLICTS=cvs-merge-conflicts -CVS=cvs - -if [ -f .mergeinfo ] ; then - echo ".mergeinfo exists - clean up first" - exit -fi - -if [ -f $CONFLICTS ] ; then - echo "$CONFLICTS exists - clean up first" - exit -fi - -if [ $# -lt 2 -o $# -gt 3 ]; then - echo "This is phase 1 of replacing branches. Usage: $0 parent(old) child(new) [dir]" - exit -fi - -parent=$1 -PARENT=`echo $parent | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -child=$2 -CHILD=`echo $child | sed -e "s/^b_//" | tr "[a-z]" "[A-Z]"` -date=`date +%Y%m%d_%H%M` - -dir=${3:-.} -module=$(basename $(<$dir/CVS/Repository)) - -if [ "$module" = "lustre" ] ; then - echo >&2 "${progname}: You probably want to land lustre or lnet, not the whole tree." - echo >&2 "${progname}: Try using ${0} $parent $child lustre" - exit 1 -fi - -case $parent in - HEAD) : ;; - b_*|b[1-4]*) : ;; - *) parent="b_$parent" ;; -esac -case $child in - HEAD) : ;; - b_*|b[1-4]*) : ;; - *) child="b_$child" -esac - -if [ "$parent" != "HEAD" -a "`cat $dir/CVS/Tag 2> /dev/null`" != "T$parent" ]; then - echo "${progname}: this script must be run within the $parent branch" - exit 1 -fi - -TEST_FILE=${TEST_FILE:-ChangeLog} # does this need to be smarter? -check_tag() { - [ -z "$1" ] && echo "check_tag() missing arg" && exit3 - [ "$1" = "HEAD" ] && return - $CVS log ${dir%%/*}/$TEST_FILE 2> /dev/null | grep -q " $1: " && return - echo "${progname}: tag $1 not found in $dir/$TEST_FILE" - exit 2 -} - -check_tag $child -check_tag ${CHILD}_BASE - -cat << EOF > ".mergeinfo" -parent=$parent -PARENT=$PARENT -child=$child -CHILD=$CHILD -date=$date -module=$module -dir=$dir -CONFLICTS=$CONFLICTS -OPERATION=Replace -OPERWHERE=onto -EOF - -echo PARENT $PARENT parent $parent CHILD $CHILD child $child date $date - -# Update your tree to the PARENT branch; HEAD is not really a branch, so you -# need to update -A instead of update -r HEAD, or the commit will fail. -p -echo -n "Updating to $parent ...." -if [ $parent == "HEAD" ]; then - $CVS update -AdP $dir -else - $CVS update -r $parent -dP $dir -fi -echo "done" - -# Tag parent before merge -echo -n "Create land-to point on $parent as ${PARENT}_${CHILD}_REPLACE_PARENT_$date ..." -$CVS rtag -r $parent ${PARENT}_${CHILD}_REPLACE_PARENT_$date $module -echo "done" - -# Tag child before merge -echo -n "Create land-from point on ${child} ${PARENT}_${CHILD}_REPLACE_CHILD_$date ..." -$CVS rtag -r ${child} ${PARENT}_${CHILD}_REPLACE_CHILD_$date $module -echo "done" - -# In case someone tries to re-land later -echo -n "Preserve old base tag on $parent ${CHILD}_BASE as ${CHILD}_BASE_PREV ..." -$CVS rtag -F -r ${CHILD}_BASE ${CHILD}_BASE_PREV $module -echo "done" - -# Apply all of the changes to your local tree: -echo -n "Updating as -j $parent -j $child ..." -$CVS update -j $parent -j $child $dir -echo "done" - -echo -n "Recording conflicts in $CONFLICTS ..." -$CVS update $dir | awk '/^C/ { print $2 }' > $CONFLICTS -if [ -s $CONFLICTS ] ; then - echo "Conflicts found, fix before committing." - cat $CONFLICTS -fi -echo "done" - -echo -n "Verifying that there are no diffs from $child ..." -$CVS diff --brief -r $child $dir >> $CONFLICTS -if [ -s $CONFLICTS ] ; then - echo "Danger! The child branch $CHILD differs from the updated branch $dir" - cat $CONFLICTS -else - echo "No conflicts found" - rm -f $CONFLICTS -fi -echo "done" - -echo "Build, test, commit and then run replace2.sh (no arguments)" diff --git a/build/suse-post.sh b/build/suse-post.sh deleted file mode 100644 index 869fa409cea490ed518b984baeb750c3aa579e00..0000000000000000000000000000000000000000 --- a/build/suse-post.sh +++ /dev/null @@ -1,58 +0,0 @@ -if [ -f /boot/vmlinuz-%ver_str ]; then - image=vmlinuz -elif [ -f /boot/image-%ver_str ]; then - image=image -elif [ -f /boot/vmlinux-%ver_str ]; then - image=vmlinux -else - # nothing to do (UML kernels for example). - exit 0 -fi - -case %ver_str in - (*xen*|*um*) - NOBOOTSPLASH="-s off" - SHORTNM=%ver_str - SHORTNM=-${SHORTNM##*-} - ;; - (*) - unset NOBOOTSPLASH - unset SHORTNM - ;; -esac - -# If we have old symlinks, rename them to *.previous -if [ -L /boot/$image$SHORTNM -a \ - "$(readlink /boot/$image$SHORTNM)" != $image-%ver_str ]; then - mv /boot/$image$SHORTNM /boot/$image$SHORTNM.previous -fi -relink $image-%ver_str /boot/$image$SHORTNM - -if test "$YAST_IS_RUNNING" != instsys ; then - if [ -f /etc/fstab ]; then - echo Setting up /lib/modules/%ver_str - /sbin/update-modules.dep -v %ver_str - cd /boot - /sbin/mkinitrd -k $image-%ver_str -i initrd-%ver_str $NOBOOTSPLASH - - if [ -L /boot/initrd$SHORTNM -a \ - "$(readlink /boot/initrd)" != initrd-%ver_str ]; then - mv /boot/initrd$SHORTNM /boot/initrd$SHORTNM.previous - fi - if [ -e /boot/initrd-%ver_str ]; then - relink initrd-%ver_str /boot/initrd$SHORTNM - else - rm -f /boot/initrd$SHORTNM - fi - else - echo "please run mkinitrd as soon as your system is complete" - fi -fi - -if [ "$YAST_IS_RUNNING" != instsys -a -x /sbin/new-kernel-pkg ]; then - # Notify boot loader that a new kernel image has been installed. - # (during initial installation the boot loader configuration does not - # yet exist when the kernel is installed, but yast kicks the boot - # loader itself later.) - /sbin/new-kernel-pkg %ver_str -fi diff --git a/build/suse-postun.sh b/build/suse-postun.sh deleted file mode 100644 index 44c87986513fac3dc59d6a367d89f883a8458fec..0000000000000000000000000000000000000000 --- a/build/suse-postun.sh +++ /dev/null @@ -1,60 +0,0 @@ -if [ -L /boot/vmlinux ]; then - image=vmlinux -elif [ -L /boot/vmlinuz ]; then - image=vmlinuz -elif [ -L /boot/image ]; then - image=image -else - # nothing to do (UML kernels for example). - exit 0 -fi - -case %ver_str in - (*xen*|*um*) - SHORTNM=%ver_str - SHORTNM=-${SHORTNM##*-} - ;; - (*) - unset SHORTNM - ;; -esac - -if [ "$(readlink /boot/$image$SHORTNM)" = $image-%ver_str ]; then - # This may be the last kernel RPM on the system, or it may - # be an update. In both of those cases the symlinks will - # eventually be correct. Only if this kernel - # is removed and other kernel rpms remain installed, - # find the most recent of the remaining kernels, and make - # the symlinks point to it. This makes sure that the boot - # manager will always have a kernel to boot in its default - # configuration. - shopt -s nullglob - for img in $(cd /boot ; ls -dt $image-*$SHORTNM); do - initrd=initrd-${img#*-} - if [ -f /boot/$img -a -f /boot/$initrd ]; then - relink $img /boot/${img%%%%-*}$SHORTNM - relink $initrd /boot/${initrd%%%%-*}$SHORTNM - - # Notify the boot loader that a new kernel image is active. - if [ -x /sbin/new-kernel-pkg ]; then - /sbin/new-kernel-pkg $(/sbin/get_kernel_version /boot/$img) - fi - break - fi - done - shopt -u nullglob -fi - -# Check whether there is a .previous link to the image we're about -# to remove or to the image we point the new symlink to (so .previous -# would be identical to the current symlink) -case "$(readlink /boot/$image$SHORTNM.previous)" in -$image-%ver_str|$(readlink /boot/$image$SHORTNM)) - rm -f /boot/$image$SHORTNM.previous ;; -esac -case "$(readlink /boot/initrd$SHORTNM.previous)" in -initrd-%ver_str|$(readlink /boot/initrd$SHORTNM)) - rm -f /boot/initrd$SHORTNM.previous ;; -esac -# created in %post -rm -f /boot/initrd-%ver_str diff --git a/configure.ac b/configure.ac deleted file mode 100644 index 889fe34acc9df99ca96225bf7c8981a769d9820a..0000000000000000000000000000000000000000 --- a/configure.ac +++ /dev/null @@ -1,16 +0,0 @@ - -AC_INIT([Lustre], [LUSTRE_VERSION], [https://bugzilla.lustre.org/], [lustre]) -sinclude(lustre/autoconf/lustre-version.ac) - -LB_CHECK_VERSION - -LC_CONFIG_SRCDIR - -AC_CANONICAL_SYSTEM - -AM_INIT_AUTOMAKE -# AM_MAINTAINER_MODE - -AC_PROG_CC - -LB_CONFIGURE diff --git a/libsysio/.cvsignore b/libsysio/.cvsignore deleted file mode 100644 index 1136bf9bf51769aa725c2ff85fe710af922d6e49..0000000000000000000000000000000000000000 --- a/libsysio/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -aclocal.m4 -autom4te.cache -config.log -config.status -configure -lib -Makefile -Makefile.in -INSTALL diff --git a/libsysio/AUTHORS b/libsysio/AUTHORS deleted file mode 100644 index 0dcc9cd26b2af0ff5c194135f24164dde3ff33e9..0000000000000000000000000000000000000000 --- a/libsysio/AUTHORS +++ /dev/null @@ -1,6 +0,0 @@ -Lee Ward <lee@sandia.gov> - -Various folks at: - -Cluster File Systems Incorporated. (www.clusterfs.com) -Cray Incorporated (www.cray.com) diff --git a/libsysio/COPYING b/libsysio/COPYING deleted file mode 100644 index 2bb5b6e5194c010ca3fad047f089ee1a605f96d2..0000000000000000000000000000000000000000 --- a/libsysio/COPYING +++ /dev/null @@ -1,502 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - <one line to give the library's name and a brief idea of what it does.> - Copyright (C) <year> <name of author> - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - <signature of Ty Coon>, 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! diff --git a/libsysio/ChangeLog b/libsysio/ChangeLog deleted file mode 100644 index 126c27f6fa231958aa36466e61066b4a785aa7c7..0000000000000000000000000000000000000000 --- a/libsysio/ChangeLog +++ /dev/null @@ -1,31 +0,0 @@ -Sat Feb 22 10:32:10 EST 2003 - Created <lee@sandia.gov> ---- - -*Added mount() api call to support sub-mounts. - -*Added rudimentary automounts per the namespace chapter in the "Lustre -Architecture Reference". Note, full URI support is not implemented. See -the README for details. - -Think I have it going for simultaneous 32/64 bit support. Together with -the nagging build for test_stat. - -*Miscellaneous bugs fixed. - ---- -Lee -- Sat Mar 22 15:01:45 EST 2003 - -*Added "incore" file system. An in-memory file system solving boot-strap -and other annoying little chicken-and-the-egg problems. - -*Added support for devices - -*Added support for accessing the pre-opened standard file descriptors 0, 1, -and 2 via the stdfd device driver (major number 0, minor 0, 1, and 2). - ---- -Lee -- Mon Jan 26 11:26:14 EST 2004 - -*Altered the internal interface to pass the xtvec (see .../include/xtio.h) in -order to support strided-io. diff --git a/libsysio/INSTALL b/libsysio/INSTALL deleted file mode 100644 index 54caf7c190f28df0279ade5fee3d43b108480a14..0000000000000000000000000000000000000000 --- a/libsysio/INSTALL +++ /dev/null @@ -1,229 +0,0 @@ -Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software -Foundation, Inc. - - This file is free documentation; the Free Software Foundation gives -unlimited permission to copy, distribute and modify it. - -Basic Installation -================== - - These are generic installation instructions. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, and a -file `config.log' containing compiler output (useful mainly for -debugging `configure'). - - It can also use an optional file (typically called `config.cache' -and enabled with `--cache-file=config.cache' or simply `-C') that saves -the results of its tests to speed up reconfiguring. (Caching is -disabled by default to prevent problems with accidental use of stale -cache files.) - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If you are using the cache, and at -some point `config.cache' contains results you don't want to keep, you -may remove or edit it. - - The file `configure.ac' (or `configure.in') is used to create -`configure' by a program called `autoconf'. You only need -`configure.ac' if you want to change it or regenerate `configure' using -a newer version of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. If you're - using `csh' on an old version of System V, you might need to type - `sh ./configure' instead to prevent `csh' from trying to execute - `configure' itself. - - Running `configure' takes awhile. While running, it prints some - messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the `configure' script does not know about. Run `./configure --help' -for details on some of the pertinent environment variables. - - You can give `configure' initial values for configuration parameters -by setting variables in the command line or in the environment. Here -is an example: - - ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix - - *Note Defining Variables::, for more details. - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you must use a version of `make' that -supports the `VPATH' variable, such as GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - If you have to use a `make' that does not support the `VPATH' -variable, you have to compile the package for one architecture at a -time in the source code directory. After you have installed the -package for one architecture, use `make distclean' before reconfiguring -for another architecture. - -Installation Names -================== - - By default, `make install' will install the package's files in -`/usr/local/bin', `/usr/local/man', etc. You can specify an -installation prefix other than `/usr/local' by giving `configure' the -option `--prefix=PATH'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -give `configure' the option `--exec-prefix=PATH', the package will use -PATH as the prefix for installing programs and libraries. -Documentation and other data files will still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=PATH' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - - Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - - There may be some features `configure' cannot figure out -automatically, but needs to determine by the type of machine the package -will run on. Usually, assuming the package is built to be run on the -_same_ architectures, `configure' can figure that out, but if it prints -a message saying it cannot guess the machine type, give it the -`--build=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name which has the form: - - CPU-COMPANY-SYSTEM - -where SYSTEM can have one of these forms: - - OS KERNEL-OS - - See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the machine type. - - If you are _building_ compiler tools for cross-compiling, you should -use the `--target=TYPE' option to select the type of system they will -produce code for. - - If you want to _use_ a cross compiler, that generates code for a -platform different from the build platform, you should specify the -"host" platform (i.e., that on which the generated programs will -eventually be run) with `--host=TYPE'. - -Sharing Defaults -================ - - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Defining Variables -================== - - Variables not defined in a site shell script can be set in the -environment passed to `configure'. However, some packages may run -configure again during the build, and the customized values of these -variables may be lost. In order to avoid this problem, you should set -them in the `configure' command line, using `VAR=value'. For example: - - ./configure CC=/usr/local2/bin/gcc - -will cause the specified gcc to be used as the C compiler (unless it is -overridden in the site shell script). - -`configure' Invocation -====================== - - `configure' recognizes the following options to control how it -operates. - -`--help' -`-h' - Print a summary of the options to `configure', and exit. - -`--version' -`-V' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`--cache-file=FILE' - Enable the cache: use and save the results of the tests in FILE, - traditionally `config.cache'. FILE defaults to `/dev/null' to - disable caching. - -`--config-cache' -`-C' - Alias for `--cache-file=config.cache'. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. To - suppress all normal output, redirect it to `/dev/null' (any error - messages will still be shown). - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`configure' also accepts some other, not widely useful, options. Run -`configure --help' for more details. - diff --git a/libsysio/Makefile.am b/libsysio/Makefile.am deleted file mode 100644 index dfa4f9ebfca47ceb9fe467b89b71a3654737c907..0000000000000000000000000000000000000000 --- a/libsysio/Makefile.am +++ /dev/null @@ -1,89 +0,0 @@ -AUTOMAKE_OPTIONS=1.6 - -if WITH_TESTS -TESTDIR = tests -else -TESTDIR = -endif - -include $(top_srcdir)/src/module.mk -include $(top_srcdir)/include/module.mk -include $(top_srcdir)/tests/module.mk -include $(top_srcdir)/dev/stdfd/module.mk -include $(top_srcdir)/drivers/incore/module.mk -include $(top_srcdir)/drivers/native/module.mk -include $(top_srcdir)/drivers/yod/module.mk -include $(top_srcdir)/drivers/sockets/module.mk - -lib_LIBRARIES = ${LIBBUILD_DIR}/libsysio.a - -if WITH_STDFD_DEV -OPTIONAL_STDFD_SRCS = $(STDFD_SRCS) -else -OPTIONAL_STDFD_SRCS = -endif - -if WITH_INCORE_DRIVER -OPTIONAL_INCORE_SRCS = $(INCORE_SRCS) -else -OPTIONAL_INCORE_SRCS = -endif - -if WITH_NATIVE_DRIVER -OPTIONAL_NATIVE_SRCS = $(NATIVE_SRCS) -else -OPTIONAL_NATIVE_SRCS = -endif - -if WITH_SOCKETS_DRIVER -OPTIONAL_SOCKETS_SRCS = $(SOCKETS_SRCS) -else -OPTIONAL_SOCKETS_SRCS = -endif - -if WITH_CPLANT_YOD -OPTIONAL_YOD_SRCS = $(YOD_SRCS) -else -OPTIONAL_YOD_SRCS = -endif - -if WITH_LUSTRE_HACK -# it would be better that let configure script check this -OPTIONAL_LUSTRE_CFLAGS = -fPIC -endif - -AM_CFLAGS = $(OPTIONAL_LUSTRE_CFLAGS) - -__LIBBUILD_DIR__libsysio_a_SOURCES = \ - $(SRCDIR_SRCS) \ - $(OPTIONAL_STDFD_SRCS) \ - $(OPTIONAL_INCORE_SRCS) \ - $(OPTIONAL_SOCKETS_SRCS) \ - $(OPTIONAL_NATIVE_SRCS) \ - $(OPTIONAL_YOD_SRCS) - -include $(top_srcdir)/Rules.make - -EXTRA_DIST = Rules.make misc/init-env.sh $(TESTS_EXTRA) $(SRCDIR_EXTRA) \ - $(INCLUDE_EXTRA) $(STDFD_EXTRA) $(INCORE_EXTRA) \ - $(SOCKETS_EXTRA) $(NATIVE_EXTRA) $(YOD_EXTRA) - -AM_CPPFLAGS += ${YOD_DRIVER_FLAGS} - -really-clean: testsclean maintainer-clean - -rm -rf autom4te-2.53.cache - -rm -rf .deps - -rm -f Makefile.in - -rm -f compile depcomp INSTALL install-sh missing mkinstalldirs \ - configure aclocal.m4 - -rm -f config.guess config.sub - -rm -rf $(LIBBUILD_DIR) - -rm -f libsysio*.tar.gz - cd $(TESTDIR); rm -rf Makefile Makefile.in .deps - -tests: $(lib_LIBRARIES) FORCE - cd $(TESTDIR); make -testsclean: FORCE - cd $(TESTDIR); make clean -clean: testsclean clean-am -FORCE: diff --git a/libsysio/NEWS b/libsysio/NEWS deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/libsysio/README b/libsysio/README deleted file mode 100644 index 76b9b97874a08cbfdfe0d059655aaba0decb9cbb..0000000000000000000000000000000000000000 --- a/libsysio/README +++ /dev/null @@ -1,66 +0,0 @@ -Build ------ - -To bootstrap configuration: - -sh autogen.sh -./configure [options] - -Without the supported "--with" options only the core sysio library is -built. - -Option --with-native-driver=yes will cause the "native" host name space test -driver to be enabled and made available in drivers/native/libsysio_native.a -when built. This is set by default; Use "no" to disable. - -Option --with-tests=yes will cause the test programs in the tests directory -to be enabled. This is set by default; Use "no" to disable. - -Option --with-automount=<automount-file-name> will cause automount support -to be included. If <automount-file-name> is not supplied, a default value -of ".mount" will be used, matching the Lustre documentation. - -To build: - -Just `make' it. - -Automounts ----------- - -For a full description of this see the "Lustre Book" at: - <http://www.lustre.org/docs/lustre.pdf> - -In short, though, whenever a component is being looked up in a directory and -that directory has the "set-UID" bit set, then the directory is -searched for a special file. By default, that file is called ".mount" but -you may set it to any name using the --with-automount option described -earlier. - -If the content of that file has something formatted, exactly: - -<file-system-type>:<source> - -Then the <source> description is mounted on the directory containing the -special automount file and being used as the parent in the lookup. If the -mount is successful, the parent is replaced with the newly mounted directory -and processing continues. If the mount fails, or the automount file -does not exist or cannot be read, everything continues as though the operation -had never been attempted. - -File systems, or volumes, or file-sets, or whatever they are called, that -have been automounted may also be automatically unmounted when resource -is required. They are not on a timer, unless the file system driver implements -one for them. They just disappear as resource is needed elsewhere. As they -were automatically mounted to begin with, they should re-establish as needed, -transparently. - -REDSTORM --------- - -The following works for me: - -#!/bin/sh - -export CFLAGS="-DREDSTORM -nostdinc -isystem /home/lee/REDSTORM/catamount/computeincs/i386 -isystem /home/lee/REDSTORM/catamount/include -g -W -Wall -ansi" - -sh configure --with-autmount=".mount" --with-native=yes --with-incore-yes --with-stdfd=yes --with-tests=yes diff --git a/libsysio/Rules.make b/libsysio/Rules.make deleted file mode 100644 index e69c6a207d03179806d962be436af1a2a37d1952..0000000000000000000000000000000000000000 --- a/libsysio/Rules.make +++ /dev/null @@ -1,18 +0,0 @@ -if WITH_STDFD_DEV -STDFD_DEV_CPPFLAGS =-DSTDFD_DEV=1 -I$(top_srcdir)/dev/stdfd -else -STFD_DEV_CPPFLAGS = -endif - -if WITH_SOCKETS_DRIVER -SOCKETS_CPPFLAGS=-DWITH_SOCKETS=1 -else -SOCKETS_CPPFLAGS= -endif - -DEV_CPPFLAGS = $(STDFD_DEV_CPPFLAGS) - -AM_CPPFLAGS = \ - $(TRACING) \ - $(AUTOMOUNT) $(ZERO_SUM_MEMORY) $(DEV_CPPFLAGS) $(SOCKETS_CPPFLAGS) \ - $(DEFER_INIT_CWD) $(SYSIO_LABEL_NAMES) -I$(top_srcdir)/include diff --git a/libsysio/autogen.sh b/libsysio/autogen.sh deleted file mode 100755 index 81ad5b65b2fa8981d7be82f9f5be259f7e698c4a..0000000000000000000000000000000000000000 --- a/libsysio/autogen.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -aclocal && -automake --add-missing --copy && -${AUTOCONF:-autoconf} diff --git a/libsysio/compile b/libsysio/compile deleted file mode 100755 index 9bb997a6a9b4549a733ed71f7e449e70c89f7a69..0000000000000000000000000000000000000000 --- a/libsysio/compile +++ /dev/null @@ -1,99 +0,0 @@ -#! /bin/sh - -# Wrapper for compilers which do not understand `-c -o'. - -# Copyright 1999, 2000 Free Software Foundation, Inc. -# Written by Tom Tromey <tromey@cygnus.com>. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Usage: -# compile PROGRAM [ARGS]... -# `-o FOO.o' is removed from the args passed to the actual compile. - -prog=$1 -shift - -ofile= -cfile= -args= -while test $# -gt 0; do - case "$1" in - -o) - # configure might choose to run compile as `compile cc -o foo foo.c'. - # So we do something ugly here. - ofile=$2 - shift - case "$ofile" in - *.o | *.obj) - ;; - *) - args="$args -o $ofile" - ofile= - ;; - esac - ;; - *.c) - cfile=$1 - args="$args $1" - ;; - *) - args="$args $1" - ;; - esac - shift -done - -if test -z "$ofile" || test -z "$cfile"; then - # If no `-o' option was seen then we might have been invoked from a - # pattern rule where we don't need one. That is ok -- this is a - # normal compilation that the losing compiler can handle. If no - # `.c' file was seen then we are probably linking. That is also - # ok. - exec "$prog" $args -fi - -# Name of file we expect compiler to create. -cofile=`echo $cfile | sed -e 's|^.*/||' -e 's/\.c$/.o/'` - -# Create the lock directory. -# Note: use `[/.-]' here to ensure that we don't use the same name -# that we are using for the .o file. Also, base the name on the expected -# object file name, since that is what matters with a parallel build. -lockdir=`echo $cofile | sed -e 's|[/.-]|_|g'`.d -while true; do - if mkdir $lockdir > /dev/null 2>&1; then - break - fi - sleep 1 -done -# FIXME: race condition here if user kills between mkdir and trap. -trap "rmdir $lockdir; exit 1" 1 2 15 - -# Run the compile. -"$prog" $args -status=$? - -if test -f "$cofile"; then - mv "$cofile" "$ofile" -fi - -rmdir $lockdir -exit $status diff --git a/libsysio/config.guess b/libsysio/config.guess deleted file mode 100755 index 500ee74b047e6c6ea95e13d9786e01defba86328..0000000000000000000000000000000000000000 --- a/libsysio/config.guess +++ /dev/null @@ -1,1410 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. - -timestamp='2003-10-03' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Per Bothner <per@bothner.com>. -# Please send patches to <config-patches@gnu.org>. Submit a context -# diff and a properly formatted ChangeLog entry. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to <config-patches@gnu.org>." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; - --version | -v ) - echo "$version" ; exit 0 ;; - --help | --h* | -h ) - echo "$usage"; exit 0 ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -trap 'exit 1' 1 2 15 - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ;' - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in - Debian*) - release='-gnu' - ;; - *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - macppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvmeppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mipseb-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - alpha:OSF1:*:*) - if test $UNAME_RELEASE = "V4.0"; then - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - fi - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE="alpha" ;; - "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; - "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; - "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; - "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; - "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; - "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; - "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; - "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; - "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; - "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; - "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; - esac - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha*:OpenVMS:*:*) - echo alpha-hp-vms - exit 0 ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit 0 ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit 0 ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit 0;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos - exit 0 ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit 0 ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit 0;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit 0 ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit 0 ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7 && exit 0 ;; - esac ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit 0 ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit 0 ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus -#include <stdio.h> /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c \ - && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && exit 0 - echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit 0 ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit 0 ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit 0 ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit 0 ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit 0 ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit 0 ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit 0 ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit 0 ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit 0 ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit 0 ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit 0 ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit 0 ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit 0 ;; - ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <sys/systemcfg.h> - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo rs6000-ibm-aix3.2.5 - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit 0 ;; - *:AIX:*:[45]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit 0 ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit 0 ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit 0 ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit 0 ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit 0 ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit 0 ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac - fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - - #define _HPUX_SOURCE - #include <stdlib.h> - #include <unistd.h> - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if [ ${HP_ARCH} = "hppa2.0w" ] - then - # avoid double evaluation of $set_cc_for_build - test -n "$CC_FOR_BUILD" || eval $set_cc_for_build - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null - then - HP_ARCH="hppa2.0w" - else - HP_ARCH="hppa64" - fi - fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} - exit 0 ;; - 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <unistd.h> - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo unknown-hitachi-hiuxwe2 - exit 0 ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit 0 ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit 0 ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit 0 ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit 0 ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit 0 ;; - i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit 0 ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit 0 ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit 0 ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit 0 ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit 0 ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit 0 ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - *:UNICOS/mp:*:*) - echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:FreeBSD:*:*|*:GNU/FreeBSD:*:*) - # Determine whether the default compiler uses glibc. - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <features.h> - #if __GLIBC__ >= 2 - LIBC=gnu - #else - LIBC= - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - # GNU/FreeBSD systems have a "k" prefix to indicate we are using - # FreeBSD's kernel, but not the complete OS. - case ${LIBC} in gnu) kernel_only='k' ;; esac - echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} - exit 0 ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; - i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 - exit 0 ;; - x86:Interix*:[34]*) - echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' - exit 0 ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit 0 ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit 0 ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit 0 ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - *:GNU:*:*) - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit 0 ;; - arm*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit 0 ;; - ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit 0 ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit 0 ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit 0 ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; - esac - exit 0 ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit 0 ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux - exit 0 ;; - sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit 0 ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit 0 ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include <features.h> - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #ifdef __INTEL_COMPILER - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 - test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 - ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit 0 ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx - exit 0 ;; - i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop - exit 0 ;; - i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos - exit 0 ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit 0 ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit 0 ;; - i*86:*:5:[78]*) - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit 0 ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name` - echo ${UNAME_MACHINE}-pc-isc$UNAME_REL - elif /bin/uname -X 2>/dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit 0 ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i386. - echo i386-pc-msdosdjgpp - exit 0 ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit 0 ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit 0 ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit 0 ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit 0 ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit 0 ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit 0 ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit 0 ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says <Richard.M.Bartel@ccMail.Census.GOV> - echo i586-unisys-sysv4 - exit 0 ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes <hewes@openmarket.com>. - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit 0 ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit 0 ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit 0 ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit 0 ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit 0 ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit 0 ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit 0 ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit 0 ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} - exit 0 ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:Darwin:*:*) - case `uname -p` in - *86) UNAME_PROCESSOR=i686 ;; - powerpc) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit 0 ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit 0 ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit 0 ;; - NSR-[DGKLNPTVWY]:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} - exit 0 ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit 0 ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit 0 ;; - DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit 0 ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "$cputype" = "386"; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo ${UNAME_MACHINE}-unknown-plan9 - exit 0 ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit 0 ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit 0 ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit 0 ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit 0 ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit 0 ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit 0 ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit 0 ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -eval $set_cc_for_build -cat >$dummy.c <<EOF -#ifdef _SEQUENT_ -# include <sys/types.h> -# include <sys/utsname.h> -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include <sys/param.h> - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include <sys/param.h> -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit 0 ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - c34*) - echo c34-convex-bsd - exit 0 ;; - c38*) - echo c38-convex-bsd - exit 0 ;; - c4*) - echo c4-convex-bsd - exit 0 ;; - esac -fi - -cat >&2 <<EOF -$0: unable to guess system type - -This script, last modified $timestamp, has failed to recognize -the operating system you are using. It is advised that you -download the most up to date version of the config scripts from - - ftp://ftp.gnu.org/pub/gnu/config/ - -If the version you run ($0) is already up to date, please -send the following data and any information you think might be -pertinent to <config-patches@gnu.org> in order to provide the needed -information to handle your system. - -config.guess timestamp = $timestamp - -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} -EOF - -exit 1 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/libsysio/config.sub b/libsysio/config.sub deleted file mode 100755 index 1f31816b97a13e14c63f276bea226a8dde6887b5..0000000000000000000000000000000000000000 --- a/libsysio/config.sub +++ /dev/null @@ -1,1510 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. - -timestamp='2003-08-18' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Please send patches to <config-patches@gnu.org>. Submit a context -# diff and a properly formatted ChangeLog entry. -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS - -Canonicalize a configuration name. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to <config-patches@gnu.org>." - -version="\ -GNU config.sub ($timestamp) - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; - --version | -v ) - echo "$version" ; exit 0 ;; - --help | --h* | -h ) - echo "$usage"; exit 0 ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo $1 - exit 0;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | kfreebsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis) - os= - basic_machine=$1 - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -chorusos*) - os=-chorusos - basic_machine=$1 - ;; - -chorusrdb) - os=-chorusrdb - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ - | c4x | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | m32r | m68000 | m68k | m88k | mcore \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64vr | mips64vrel \ - | mips64orion | mips64orionel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | msp430 \ - | ns16k | ns32k \ - | openrisc | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ - | pyramid \ - | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ - | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ - | v850 | v850e \ - | we32k \ - | x86 | xscale | xstormy16 | xtensa \ - | z8k) - basic_machine=$basic_machine-unknown - ;; - m6811 | m68hc11 | m6812 | m68hc12) - # Motorola 68HC11/12. - basic_machine=$basic_machine-unknown - os=-none - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* \ - | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | m32r-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipstx39-* | mipstx39el-* \ - | msp430-* \ - | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ - | pyramid-* \ - | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tron-* \ - | v850-* | v850e-* | vax-* \ - | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ - | ymp-* \ - | z8k-*) - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - crds | unos) - basic_machine=m68k-crds - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 - ;; - decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - mingw32) - basic_machine=i386-pc - os=-mingw32 - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - mmix*) - basic_machine=mmix-knuth - os=-mmixware - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; - np1) - basic_machine=np1-gould - ;; - nv1) - basic_machine=nv1-cray - os=-unicosmp - ;; - nsr-tandem) - basic_machine=nsr-tandem - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - or32 | or32-*) - basic_machine=or32-unknown - os=-coff - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sb1) - basic_machine=mipsisa64sb1-unknown - ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown - ;; - sei) - basic_machine=mips-sei - os=-seiux - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - sv1) - basic_machine=sv1-cray - os=-unicos - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=alphaev5-cray - os=-unicos - ;; - t90) - basic_machine=t90-cray - os=-unicos - ;; - tic54x | c54x*) - basic_machine=tic54x-unknown - os=-coff - ;; - tic55x | c55x*) - basic_machine=tic55x-unknown - os=-coff - ;; - tic6x | c6x*) - basic_machine=tic6x-unknown - os=-coff - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - toad1) - basic_machine=pdp10-xkl - os=-tops20 - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - ymp) - basic_machine=ymp-cray - os=-unicos - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - romp) - basic_machine=romp-ibm - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) - basic_machine=sh-unknown - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparc | sparcv9 | sparcv9b) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - *-unknown) - # Make sure to match an already-canonicalized machine name. - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -netbsd* | -openbsd* | -kfreebsd* | -freebsd* | -riscix* \ - | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ - | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ - | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ - | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -qnx*) - case $basic_machine in - x86-* | i*86-*) - ;; - *) - os=-nto$os - ;; - esac - ;; - -nto-qnx*) - ;; - -nto*) - os=`echo $os | sed -e 's|nto|nto-qnx|'` - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ - | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux-dietlibc) - os=-linux-dietlibc - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -wince*) - os=-wince - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -atheos*) - os=-atheos - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -nova*) - os=-rtmk-nova - ;; - -ns2 ) - os=-nextstep2 - ;; - -nsk*) - os=-nsk - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - os=-mint - ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - c4x-* | tic4x-*) - os=-coff - ;; - # This must come before the *-dec entry. - pdp10-*) - os=-tops20 - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - m68*-cisco) - os=-aout - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - or32-*) - os=-coff - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-ibm) - os=-aix - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f30[01]-fujitsu | f700-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -vxsim* | -vxworks* | -windiss*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - vendor=atari - ;; - -vos*) - vendor=stratus - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/libsysio/configure.in b/libsysio/configure.in deleted file mode 100644 index a0e77f34e52e20154ad38f29ef376874d611ab45..0000000000000000000000000000000000000000 --- a/libsysio/configure.in +++ /dev/null @@ -1,464 +0,0 @@ -AC_INIT(libsysio, 1.2) - -AC_CANONICAL_HOST - -case "$host_os" in - aix*) - ;; - linux*) - ;; - *) - AC_MSG_WARN('***' ${host_os}: Unsupported OS target) - ;; -esac - -AM_INIT_AUTOMAKE([subdir-objects]) -AC_PROG_CC -AM_PROG_CC_C_O - -AC_PROG_RANLIB -AC_PROG_MAKE_SET -AC_HEADER_STDC -AC_HEADER_STAT -AC_HEADER_TIME - -have_lib_dir=yes; -AC_ARG_WITH(lib-dir, - AC_HELP_STRING([--with-lib-dir=<sysio lib build directory>], - [directory for sysio library]), - [ case "${withval}" in - "yes"|"no"|"") have_lib_dir=no ;; - *) LIBBUILD_DIR=${withval}; - test -d ${LIBBUILD_DIR} || mkdir ${LIBBUILD_DIR} || - have_lib_dir=no;; - esac;], - [ LIBBUILD_DIR=`pwd`/lib; - test -d ${LIBBUILD_DIR} || mkdir ${LIBBUILD_DIR} || have_lib_dir=no;]) -if test x${have_lib_dir} = xyes; then - echo "Using sysio library directory ${LIBBUILD_DIR}" -else - AC_MSG_ERROR(Need writeable path to sysio library directory ${LIBBUILD_DIR}) -fi -AC_SUBST(LIBBUILD_DIR) - -AC_ARG_WITH(native_driver, - AC_HELP_STRING([--with-native-driver],[build native test driver]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-native-driver) ;; - esac;], - [with_native_driver=yes;]) -AM_CONDITIONAL(WITH_NATIVE_DRIVER, test x$with_native_driver = xyes) - -AC_ARG_WITH(incore-driver, - AC_HELP_STRING([--with-incore-driver],[build incore test driver]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-incore-driver) ;; - esac], - [with_incore_driver=yes]) -AM_CONDITIONAL(WITH_INCORE_DRIVER, test x$with_incore_driver = xyes) - -AC_ARG_WITH(tests, - AC_HELP_STRING([--with-tests],[build tests]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-tests) ;; - esac], - [with_tests=yes]) -AM_CONDITIONAL(WITH_TESTS, test x$with_tests = xyes) - -AC_ARG_WITH(automount, - AC_HELP_STRING([--with-automount@<:@=<automount-file-name>@:>@], - [with automounts @<:@<automount-file-name>=.mount@:>@]), - [ if test x${withval} = xyes; then - AUTOMOUNT=-DAUTOMOUNT_FILE_NAME="\\\".mount\\\"" - elif test x${withval} != x; then - AUTOMOUNT=-DAUTOMOUNT_FILE_NAME="\\\"${withval}\\\"" - fi]) -AC_SUBST(AUTOMOUNT) - -AC_ARG_WITH(stdfd-dev, - AC_HELP_STRING([--with-stdfd-dev], - [build standard file descriptors pseudo-driver]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-stdfd-dev) ;; - esac], - [with_stdfd_dev=yes]) -AM_CONDITIONAL(WITH_STDFD_DEV, test x$with_stdfd_dev = xyes) - -AC_ARG_WITH(zero-sum-memory, - AC_HELP_STRING([--with-zero-sum-memory], - [free all dynamically allocated memory at the end -- useful for debugging]), - [ case "${withval}" in - yes) ZERO_SUM_MEMORY=-DZERO_SUM_MEMORY=1 ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-zero-sum-memory) ;; - esac], - [with_zero_sum_memory=no]) -AC_SUBST(ZERO_SUM_MEMORY) - -AC_ARG_WITH(defer-init-cwd, - AC_HELP_STRING([--with-defer-init-cwd], - [defer initialization of current working directory]), - [ case "${withval}" in - yes) DEFER_INIT_CWD=-DDEFER_INIT_CWD=1 ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-defer-init-cwd) ;; - esac], - [with_defer_init_cwd=no]) -AC_SUBST(DEFER_INIT_CWD) - -AC_ARG_WITH(tracing, - AC_HELP_STRING([--with-tracing], - [enable tracing support]), - [ case "${withval}" in - yes) TRACING=-DSYSIO_TRACING=1 ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-tracing) ;; - esac], - [TRACING=-DSYSIO_TRACING=1]) -AC_SUBST(TRACING) - -AC_ARG_WITH(cplant_yod, - AC_HELP_STRING([--with-cplant-yod],[build cplant yod I/O driver]), - [ case "${withval}" in - yes) if test x${with_stdfd_dev} != xyes; then - with_stdfd_dev=yes - AM_CONDITIONAL(WITH_STDFD_DEV, test x$with_stdfd_dev = xyes) - fi ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-cplant-yod);; - esac], - [with_cplant_yod=no]) -AM_CONDITIONAL(WITH_CPLANT_YOD, test x$with_cplant_yod = xyes) - -AC_ARG_WITH(cplant_tests, - AC_HELP_STRING([--with-cplant-tests=<cplant-build-path>], - [build libsysio tests for cplant platform]), - [ case "${withval}" in - yes) AC_MSG_ERROR(need path to compiler for --with-cplant-tests);; - no) with_cplant_tests=no;; - *) CC=${withval} - CCDEPMODE=${CC} - CPP="${CC} -E" - AC_CHECK_FILE(${CC}, - [ if test x${with_cplant_yod} != xyes; then - with_cplant_yod=yes - AM_CONDITIONAL(WITH_CPLANT_YOD, test x$with_cplant_yod = xyes) - fi], - [ AC_MSG_ERROR(path not found ${CC} for --with-cplant-tests) ]);; - esac], - [with_cplant_tests=no]) -AM_CONDITIONAL(WITH_CPLANT_TESTS, test x$with_cplant_tests != xno) - -AC_ARG_WITH(sockets, - AC_HELP_STRING([--with-sockets], - [build sockets interface driver (EXPERIMENTAL)]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-sockets) ;; - esac], - [with_sockets=no]) -AM_CONDITIONAL(WITH_SOCKETS_DRIVER, test x$with_sockets = xyes) - -AC_ARG_WITH(lustre-hack, - AC_HELP_STRING([--with-lustre-hack], - [have hacking code which needed to support liblustre driver (EXPERIMENTAL)]), - [ case "${withval}" in - yes) ;; - no) ;; - *) AC_MSG_ERROR(bad value ${withval} for --with-lustre-hack) ;; - esac], - [with_lustre_hack=no]) -AM_CONDITIONAL(WITH_LUSTRE_HACK, test x$with_lustre_hack = xyes) -if test x$with_lustre_hack = xyes; then - AC_DEFINE(HAVE_LUSTRE_HACK) -fi - -AC_ARG_WITH(alternate-symbols, - AC_HELP_STRING([--with-alternate-symbols@<:@=<qualifier>@:>@], - [Prepend standard, public, symbols with a unique qualifer]), - [ case "${withval}" in - yes) SYSIO_LABEL_NAMES=-DSYSIO_LABEL_NAMES=sysio_ ;; - no) ;; - *) SYSIO_LABEL_NAMES=-DSYSIO_LABEL_NAMES="${withval}" ;; - esac]) -AC_SUBST(SYSIO_LABEL_NAMES) - -# We keep the original values in `$config_*' and never modify them, so we -# can write them unchanged into config.make. Everything else uses -# $machine, $vendor, and $os, and changes them whenever convenient. -config_machine=$host_cpu config_vendor=$host_vendor config_os=$host_os - -# Don't allow vendor == "unknown" -test "$config_vendor" = unknown && config_vendor= -config_os="`echo $config_os | sed 's/^unknown-//'`" - -# Some configurations imply other options. -case "$host_os" in - gnu* | linux* | bsd4.4* | netbsd* | freebsd*) - # These systems always use GNU tools. - gnu_ld=yes gnu_as=yes ;; -esac -case "$host_os" in - # i586-linuxaout is mangled into i586-pc-linux-gnuaout - linux*ecoff* | linux*aout* | gnu*aout* | gnu*ecoff*) - ;; - gnu* | linux* | freebsd* | netbsd* | sysv4* | solaris2* | irix6*) - # These systems (almost) always use the ELF format. - elf=yes - ;; - aix*) - # These systems are always xcoff - xcoff=yes - elf=no - ;; -esac - -machine=$config_machine -vendor=$config_vendor -os=$config_os - -# config.guess on some IBM machines says `rs6000' instead of `powerpc'. -# Unify this here. -if test "$machine" = rs6000; then - machine="powerpc" -fi - -case "$host_os" in - gnu* | linux*) - AC_DEFINE(_XOPEN_SOURCE, 600) - ;; - aix*) - # ... and always needed... - AC_DEFINE(__USE_LARGEFILE64) - AC_DEFINE(_LARGE_FILES) - AC_DEFINE(_LARGE_FILE_API) - AC_DEFINE(_ALL_SOURCE) - AC_DEFINE(_XOPEN_SOURCE_EXTENDED) - ;; -esac - -AC_MSG_CHECKING(for symlink support) -AC_TRY_COMPILE([ -#include <sys/types.h> -#include <sys/stat.h> -],[ -#ifndef S_ISLNK -#error -#endif -], - symlink_support="yes", - symlink_support="no" -) -AC_MSG_RESULT($symlink_support) - -if test x$symlink_support = xyes; then - AC_MSG_CHECKING(if readlink returns int) - AC_TRY_COMPILE([ -#include <unistd.h> - ],[ - extern int readlink(const char *, char *, size_t); - ], - readlink_returns_int="yes", - readlink_returns_int="no" - ) - AC_MSG_RESULT($readlink_returns_int) - if test x$readlink_returns_int = no; then - AC_DEFINE(HAVE_POSIX_1003_READLINK, - 1, - [readlink returns ssize_t]) - fi -fi - -AC_MSG_CHECKING(if readlink returns ssize_t) -AC_TRY_COMPILE([ - #include <unistd.h> -],[ - ssize_t readlink(const char *, char *, size_t); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_POSIX_1003_READLINK, 1, [readlink returns ssize_t]) -],[ - AC_MSG_RESULT([no]) -]) - -# If we can't provoke the declaration of stat64 then we assume the -# environment supports 64-bit file support naturally. Beware! -AC_MSG_CHECKING(whether _LARGEFILE64_SOURCE definition is required) -AC_TRY_COMPILE([ -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h>], [ -struct stat64 st64;], -sysio_largefile64_source_required=no, -sysio_largefile64_source_required=maybe) -if test x$sysio_largefile64_source_required = xmaybe; then - AC_TRY_COMPILE([ -#define _LARGEFILE64_SOURCE -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h>], [ -struct stat64 st64;], - sysio_largefile64_source_required=yes, - sysio_largefile64_source_required=no) -fi -AC_MSG_RESULT($sysio_largefile64_source_required) -if test x$sysio_largefile64_source_required = xyes; then - AC_DEFINE(_LARGEFILE64_SOURCE) -fi - -# Alpha linux defines -# -AC_MSG_CHECKING(for alpha linux) -alpha_linux_env=no -if test `expr ${machine} : "alpha"` = 5 && \ - test `expr ${os} : "linux"` = 5; then - alpha_linux_env=yes - AC_DEFINE(ALPHA_LINUX) -fi -AC_MSG_RESULT($alpha_linux_env) -AM_CONDITIONAL(TEST_ALPHA_ARG, test x$alpha_linux_env = xyes) - -# Check for __st_ino -# -AC_MSG_CHECKING(for __st_ino) -AC_TRY_COMPILE([ -#include <sys/stat.h>], -[struct stat st; -st.__st_ino = 0;], - have__st_ino=yes, - have__st_ino=no) -AC_MSG_RESULT($have__st_ino) -if test x$have__st_ino = xyes; then - AC_DEFINE(HAVE__ST_INO) -fi - -# Check for st_gen -# -AC_MSG_CHECKING(for st_gen) -AC_TRY_COMPILE([ -#include <sys/stat.h>], -[struct stat st; -st.st_gen = 0;], - have_st_gen=yes, - have_st_gen=no) -AC_MSG_RESULT($have_st_gen) -if test x$have_st_gen = xyes; then - AC_DEFINE(HAVE_GENERATION) -fi - -AC_MSG_CHECKING(whether .text pseudo-op must be used) -AC_CACHE_VAL(sysio_asm_dot_text, [dnl -cat > conftest.s <<EOF - .text -EOF - sysio_asm_dot_text= - if ${CC-cc} $CFLAGS -c conftest.s 2>/dev/null; then - sysio_asm_dot_text=.text - fi - rm -f conftest*]) -if test -z "$sysio_dot_text"; then - AC_MSG_RESULT(no) -else - AC_MSG_RESULT(yes) -fi - -AC_CACHE_CHECK(for assembler global-symbol directive, - sysio_asm_global_directive, [dnl -sysio_asm_global_directive=UNKNOWN -for ac_globl in .globl .global .EXPORT; do - cat > conftest.s <<EOF - ${sysio_asm_dot_text} - ${ac_globl} foo -foo: -EOF - if ${CC-cc} $CFLAGS -c conftest.s 2>/dev/null; then - sysio_asm_global_directive=${ac_globl} - fi - rm -f conftest* - test $sysio_asm_global_directive != UNKNOWN && break -done]) -if test $sysio_asm_global_directive = UNKNOWN; then - AC_MSG_ERROR(cannot determine asm global directive) -#else -# AC_DEFINE_UNQUOTED(ASM_GLOBAL_DIRECTIVE, ${sysio_asm_global_directive}) -fi - -AC_CACHE_CHECK(for .set assembler directive, - sysio_asm_set_directive, [dnl -cat > conftest.s<<EOF -${sysio_asm_dot_text} -foo: -.set bar, foo -${sysio_asm_global_directive} bar -EOF - # The alpha-dec-osf1 assembler gives only a warning for `.set' - # (but it doesn't work), so we must do a linking check to be sure. -cat > conftest1.c <<EOF -extern int bar; -main () { printf ("%d\n", bar); } -EOF - if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS \ - -o conftest conftest.s conftest1.c 1>&AC_FD_CC 2>&AC_FD_CC; then - sysio_asm_set_directive=yes - else - sysio_asm_set_directive=no - fi - rm -f conftest*]) -#if test $sysio_asm_set_directive = yes; then -# AC_DEFINE(HAVE_ASM_SET_DIRECTIVE) -#fi - -AC_CACHE_CHECK(for assembler .weak directive, sysio_asm_weak_directive, - [dnl -cat > conftest.s <<EOF -${sysio_dot_text} -foo: -.weak foo -EOF - if ${CC-cc} $CFLAGS -c conftest.s 2>/dev/null; then - sysio_asm_weak_directive=yes - else - sysio_asm_weak_directive=no - fi - rm -f conftest*]) - -if test $sysio_asm_weak_directive = no; then - AC_CACHE_CHECK(for assembler .weakext directive, - sysio_asm_weakext_directive, [dnl -cat > conftest.s <<EOF -${sysio_dot_text} -${sysio_asm_global_directive} foo -foo: -.weakext bar foo -.weakext baz -${sysio_asm_global_directive} baz -baz: -EOF - if ${CC-cc} $CFLAGS -c conftest.s 2>/dev/null; then - sysio_asm_weakext_directive=yes - else - sysio_asm_weakext_directive=no - fi - rm -f conftest*]) -fi # no .weak - -if test x$sysio_asm_weak_directive = xyes; then - AC_DEFINE(HAVE_ASM_WEAK_DIRECTIVE) -fi -if test x$sysio_asm_weakext_directive = xyes; then - AC_DEFINE(HAVE_ASM_WEAKEXT_DIRECTIVE) -fi - -AC_OUTPUT( - Makefile - tests/Makefile) - diff --git a/libsysio/depcomp b/libsysio/depcomp deleted file mode 100755 index edb5d38efdb2f5cb274a8c493fa5ed776f06ae22..0000000000000000000000000000000000000000 --- a/libsysio/depcomp +++ /dev/null @@ -1,479 +0,0 @@ -#! /bin/sh - -# depcomp - compile a program generating dependencies as side-effects -# Copyright 1999, 2000, 2003 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>. - -if test -z "$depmode" || test -z "$source" || test -z "$object"; then - echo "depcomp: Variables source, object and depmode must be set" 1>&2 - exit 1 -fi -# `libtool' can also be set to `yes' or `no'. - -if test -z "$depfile"; then - base=`echo "$object" | sed -e 's,^.*/,,' -e 's,\.\([^.]*\)$,.P\1,'` - dir=`echo "$object" | sed 's,/.*$,/,'` - if test "$dir" = "$object"; then - dir= - fi - # FIXME: should be _deps on DOS. - depfile="$dir.deps/$base" -fi - -tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} - -rm -f "$tmpdepfile" - -# Some modes work just like other modes, but use different flags. We -# parameterize here, but still list the modes in the big case below, -# to make depend.m4 easier to write. Note that we *cannot* use a case -# here, because this file can only contain one case statement. -if test "$depmode" = hp; then - # HP compiler uses -M and no extra arg. - gccflag=-M - depmode=gcc -fi - -if test "$depmode" = dashXmstdout; then - # This is just like dashmstdout with a different argument. - dashmflag=-xM - depmode=dashmstdout -fi - -case "$depmode" in -gcc3) -## gcc 3 implements dependency tracking that does exactly what -## we want. Yay! Note: for some reason libtool 1.4 doesn't like -## it if -MD -MP comes after the -MF stuff. Hmm. - "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - mv "$tmpdepfile" "$depfile" - ;; - -gcc) -## There are various ways to get dependency output from gcc. Here's -## why we pick this rather obscure method: -## - Don't want to use -MD because we'd like the dependencies to end -## up in a subdir. Having to rename by hand is ugly. -## (We might end up doing this anyway to support other compilers.) -## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like -## -MM, not -M (despite what the docs say). -## - Using -M directly means running the compiler twice (even worse -## than renaming). - if test -z "$gccflag"; then - gccflag=-MD, - fi - "$@" -Wp,"$gccflag$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - echo "$object : \\" > "$depfile" - alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz -## The second -e expression handles DOS-style file names with drive letters. - sed -e 's/^[^:]*: / /' \ - -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" -## This next piece of magic avoids the `deleted header file' problem. -## The problem is that when a header file which appears in a .P file -## is deleted, the dependency causes make to die (because there is -## typically no way to rebuild the header). We avoid this by adding -## dummy dependencies for each header file. Too bad gcc doesn't do -## this for us directly. - tr ' ' ' -' < "$tmpdepfile" | -## Some versions of gcc put a space before the `:'. On the theory -## that the space means something, we add a space to the output as -## well. -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -hp) - # This case exists only to let depend.m4 do its work. It works by - # looking at the text of this script. This case will never be run, - # since it is checked for above. - exit 1 - ;; - -sgi) - if test "$libtool" = yes; then - "$@" "-Wp,-MDupdate,$tmpdepfile" - else - "$@" -MDupdate "$tmpdepfile" - fi - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - - if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files - echo "$object : \\" > "$depfile" - - # Clip off the initial element (the dependent). Don't try to be - # clever and replace this with sed code, as IRIX sed won't handle - # lines with more than a fixed number of characters (4096 in - # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; - # the IRIX cc adds comments like `#:fec' to the end of the - # dependency line. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ - tr ' -' ' ' >> $depfile - echo >> $depfile - - # The second pass generates a dummy entry for each header file. - tr ' ' ' -' < "$tmpdepfile" \ - | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ - >> $depfile - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -aix) - # The C for AIX Compiler uses -M and outputs the dependencies - # in a .u file. In older versions, this file always lives in the - # current directory. Also, the AIX compiler puts `$object:' at the - # start of each line; $object doesn't have directory information. - # Version 6 uses the directory in both cases. - stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` - tmpdepfile="$stripped.u" - if test "$libtool" = yes; then - "$@" -Wc,-M - else - "$@" -M - fi - stat=$? - - if test -f "$tmpdepfile"; then : - else - stripped=`echo "$stripped" | sed 's,^.*/,,'` - tmpdepfile="$stripped.u" - fi - - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - - if test -f "$tmpdepfile"; then - outname="$stripped.o" - # Each line is of the form `foo.o: dependent.h'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" - sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" - else - # The sourcefile does not contain any dependencies, so just - # store a dummy comment line, to avoid errors with the Makefile - # "include basename.Plo" scheme. - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -icc) - # Intel's C compiler understands `-MD -MF file'. However on - # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c - # ICC 7.0 will fill foo.d with something like - # foo.o: sub/foo.c - # foo.o: sub/foo.h - # which is wrong. We want: - # sub/foo.o: sub/foo.c - # sub/foo.o: sub/foo.h - # sub/foo.c: - # sub/foo.h: - # ICC 7.1 will output - # foo.o: sub/foo.c sub/foo.h - # and will wrap long lines using \ : - # foo.o: sub/foo.c ... \ - # sub/foo.h ... \ - # ... - - "$@" -MD -MF "$tmpdepfile" - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile" - exit $stat - fi - rm -f "$depfile" - # Each line is of the form `foo.o: dependent.h', - # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. - # Do two passes, one to just change these to - # `$object: dependent.h' and one to simply `dependent.h:'. - sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" - # Some versions of the HPUX 10.20 sed can't process this invocation - # correctly. Breaking it into two sed invocations is a workaround. - sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | - sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -tru64) - # The Tru64 compiler uses -MD to generate dependencies as a side - # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. - # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put - # dependencies in `foo.d' instead, so we check for that too. - # Subdirectories are respected. - dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` - test "x$dir" = "x$object" && dir= - base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` - - if test "$libtool" = yes; then - tmpdepfile1="$dir.libs/$base.lo.d" - tmpdepfile2="$dir.libs/$base.d" - "$@" -Wc,-MD - else - tmpdepfile1="$dir$base.o.d" - tmpdepfile2="$dir$base.d" - "$@" -MD - fi - - stat=$? - if test $stat -eq 0; then : - else - rm -f "$tmpdepfile1" "$tmpdepfile2" - exit $stat - fi - - if test -f "$tmpdepfile1"; then - tmpdepfile="$tmpdepfile1" - else - tmpdepfile="$tmpdepfile2" - fi - if test -f "$tmpdepfile"; then - sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" - # That's a tab and a space in the []. - sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" - else - echo "#dummy" > "$depfile" - fi - rm -f "$tmpdepfile" - ;; - -#nosideeffect) - # This comment above is used by automake to tell side-effect - # dependency tracking mechanisms from slower ones. - -dashmstdout) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - test -z "$dashmflag" && dashmflag=-M - # Require at least two characters before searching for `:' - # in the target name. This is to cope with DOS-style filenames: - # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. - "$@" $dashmflag | - sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - tr ' ' ' -' < "$tmpdepfile" | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -dashXmstdout) - # This case only exists to satisfy depend.m4. It is never actually - # run, as this mode is specially recognized in the preamble. - exit 1 - ;; - -makedepend) - "$@" || exit $? - # Remove any Libtool call - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - # X makedepend - shift - cleared=no - for arg in "$@"; do - case $cleared in - no) - set ""; shift - cleared=yes ;; - esac - case "$arg" in - -D*|-I*) - set fnord "$@" "$arg"; shift ;; - # Strip any option that makedepend may not understand. Remove - # the object too, otherwise makedepend will parse it as a source file. - -*|$object) - ;; - *) - set fnord "$@" "$arg"; shift ;; - esac - done - obj_suffix="`echo $object | sed 's/^.*\././'`" - touch "$tmpdepfile" - ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" - rm -f "$depfile" - cat < "$tmpdepfile" > "$depfile" - sed '1,2d' "$tmpdepfile" | tr ' ' ' -' | \ -## Some versions of the HPUX 10.20 sed can't process this invocation -## correctly. Breaking it into two sed invocations is a workaround. - sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" "$tmpdepfile".bak - ;; - -cpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout. - "$@" || exit $? - - # Remove the call to Libtool. - if test "$libtool" = yes; then - while test $1 != '--mode=compile'; do - shift - done - shift - fi - - # Remove `-o $object'. - IFS=" " - for arg - do - case $arg in - -o) - shift - ;; - $object) - shift - ;; - *) - set fnord "$@" "$arg" - shift # fnord - shift # $arg - ;; - esac - done - - "$@" -E | - sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | - sed '$ s: \\$::' > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - cat < "$tmpdepfile" >> "$depfile" - sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -msvisualcpp) - # Important note: in order to support this mode, a compiler *must* - # always write the preprocessed file to stdout, regardless of -o, - # because we must use -o when running libtool. - "$@" || exit $? - IFS=" " - for arg - do - case "$arg" in - "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") - set fnord "$@" - shift - shift - ;; - *) - set fnord "$@" "$arg" - shift - shift - ;; - esac - done - "$@" -E | - sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" - rm -f "$depfile" - echo "$object : \\" > "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" - echo " " >> "$depfile" - . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" - rm -f "$tmpdepfile" - ;; - -none) - exec "$@" - ;; - -*) - echo "Unknown depmode $depmode" 1>&2 - exit 1 - ;; -esac - -exit 0 diff --git a/libsysio/dev/stdfd/.cvsignore b/libsysio/dev/stdfd/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/dev/stdfd/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/dev/stdfd/module.mk b/libsysio/dev/stdfd/module.mk deleted file mode 100644 index ad034fb86aaeef55df3964e3ce17cad988754e8b..0000000000000000000000000000000000000000 --- a/libsysio/dev/stdfd/module.mk +++ /dev/null @@ -1,2 +0,0 @@ -STDFD_SRCS = dev/stdfd/stdfd.c -STDFD_EXTRA = dev/stdfd/stdfd.h dev/stdfd/module.mk diff --git a/libsysio/dev/stdfd/stdfd.c b/libsysio/dev/stdfd/stdfd.c deleted file mode 100644 index 6ea426cd2b50ea43f5cfe7ddd10534c5684c02fc..0000000000000000000000000000000000000000 --- a/libsysio/dev/stdfd/stdfd.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#include <errno.h> -#include <stdarg.h> -#include <sys/syscall.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "native.h" -#include "inode.h" -#include "dev.h" - -#include "stdfd.h" - -#ifdef CPLANT_YOD -#include <sys/statfs.h> -#include "cplant-yod.h" -#define dowrite(f, b, n) write_yod(f, b, n) -#define doread(f, b, n) read_yod(f, b, n) -#else -#define dowrite(f, b, n) syscall(SYSIO_SYS_write, f, b, n) -#define doread(f, b, n) syscall(SYSIO_SYS_read, f, b, n) -#endif - -/* - * Pre-opened standard file descriptors driver. - */ - -static int stdfd_open(struct pnode *pno, int flags, mode_t mode); -static int stdfd_close(struct inode *ino); -static int stdfd_read(struct inode *ino, struct ioctx *ioctx); -static int stdfd_write(struct inode *ino, struct ioctx *ioctx); -static int stdfd_iodone(struct ioctx *ioctx); -static int stdfd_datasync(struct inode *ino); -static int stdfd_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn); -static int stdfd_ioctl(struct inode *ino, - unsigned long int request, - va_list ap); - -int -_sysio_stdfd_init() -{ - struct inode_ops stdfd_operations; - - stdfd_operations = _sysio_nodev_ops; - stdfd_operations.inop_open = stdfd_open; - stdfd_operations.inop_close = stdfd_close; - stdfd_operations.inop_read = stdfd_read; - stdfd_operations.inop_write = stdfd_write; - stdfd_operations.inop_iodone = stdfd_iodone; - stdfd_operations.inop_fcntl = stdfd_fcntl; - stdfd_operations.inop_datasync = stdfd_datasync; - stdfd_operations.inop_ioctl = stdfd_ioctl; - - return _sysio_char_dev_register(SYSIO_C_STDFD_MAJOR, - "stdfd", - &stdfd_operations); -} - -static int -stdfd_open(struct pnode *pno __IS_UNUSED, - int flags __IS_UNUSED, - mode_t mode __IS_UNUSED) -{ - - return 0; -} - -static int -stdfd_close(struct inode *ino __IS_UNUSED) -{ - - return 0; -} - -static int -doio(ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, struct inode *), - struct inode *ino, - struct ioctx *ioctx) -{ - - if (ioctx->ioctx_xtvlen != 1) { - /* - * No scatter/gather to "file" address space (we're not - * seekable) and "nowhere" makes no sense. - */ - return -EINVAL; - } - ioctx->ioctx_cc = - _sysio_doio(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, - ioctx->ioctx_iov, ioctx->ioctx_iovlen, - (ssize_t (*)(void *, size_t, _SYSIO_OFF_T, void *))f, - ino); - if (ioctx->ioctx_cc < 0) { - ioctx->ioctx_errno = -ioctx->ioctx_cc; - ioctx->ioctx_cc = -1; - } - return 0; -} - -static ssize_t -stdfd_read_simple(void *buf, - size_t nbytes, - _SYSIO_OFF_T off __IS_UNUSED, - struct inode *ino) -{ - int fd = SYSIO_MINOR_DEV(ino->i_stbuf.st_rdev); - int cc; - - cc = doread(fd, buf, nbytes); - if (cc < 0) - cc = -errno; - return cc; -} - -static int -stdfd_read(struct inode *ino, struct ioctx *ioctx) -{ - - return doio(stdfd_read_simple, ino, ioctx); -} - -static ssize_t -stdfd_write_simple(const void *buf, - size_t nbytes, - _SYSIO_OFF_T off __IS_UNUSED, - struct inode *ino) -{ - int fd = SYSIO_MINOR_DEV(ino->i_stbuf.st_rdev); - int cc; - - cc = dowrite(fd, buf, nbytes); - if (cc < 0) - cc = -errno; - return cc; -} - -static int -stdfd_write(struct inode *ino, struct ioctx *ioctx) -{ - - return doio((ssize_t (*)(void *, - size_t, - _SYSIO_OFF_T, - struct inode *))stdfd_write_simple, - ino, - ioctx); -} - -static int -stdfd_iodone(struct ioctx *iocp __IS_UNUSED) -{ - - /* - * It's always done in this driver. It completed when posted. - */ - return 1; -} - -static int -stdfd_fcntl(struct inode *ino, - int cmd, - va_list ap, - int *rtn) -{ - int err; - int fd = SYSIO_MINOR_DEV(ino->i_stbuf.st_rdev); - long arg; - - err = 0; - switch (cmd) { - case F_GETFL: - *rtn = syscall(SYS_fcntl, fd, cmd); - if (*rtn == -1) - err = -errno; - break; - case F_SETFL: - arg = va_arg(ap, long); - *rtn = syscall(SYS_fcntl, fd, cmd, arg); - if (*rtn == -1) - err = -errno; - va_end(ap); - break; - default: - *rtn = -1; - err = -EINVAL; - } - return err; -} - -static int -stdfd_datasync(struct inode *ino __IS_UNUSED) -{ - - /* - * We don't buffer, so nothing to do. - */ - return 0; -} - -static int -stdfd_ioctl(struct inode *ino __IS_UNUSED, - unsigned long int request __IS_UNUSED, - va_list ap __IS_UNUSED) -{ - - return -ENOTTY; -} diff --git a/libsysio/dev/stdfd/stdfd.h b/libsysio/dev/stdfd/stdfd.h deleted file mode 100644 index 3bac7c19b07fbaa4cd3b31c2643a62f7c06036ea..0000000000000000000000000000000000000000 --- a/libsysio/dev/stdfd/stdfd.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Std{in,out,err} pseudo-device-driver support. - */ - -#define SYSIO_C_STDFD_MAJOR 0 - -extern int _sysio_stdfd_init(void); diff --git a/libsysio/drivers/incore/.cvsignore b/libsysio/drivers/incore/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/drivers/incore/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/drivers/incore/README b/libsysio/drivers/incore/README deleted file mode 100644 index 2f8c4b8d951706eb8a397ea16794ce3613ff86bb..0000000000000000000000000000000000000000 --- a/libsysio/drivers/incore/README +++ /dev/null @@ -1,27 +0,0 @@ -This "incore" file system driver is a self-contained file system. It does -not use any resource external to the node. - -It is primarily intended for enabling an efficient compute-node bootstrap. It -might also be useful for a very small scratch file system, holding device -files, and the like. - -The root directory i-node is manufactured on the fly. The source specification -for the mount() call should be something like: - - <perms>+<uid>+<gid> - -Where: - <perms> are the directory permissions masked by 0777 - Note -- no umask is applied. - <uid> should be the owner's uid - <gid> should be the owner's gid - -Most operations are supported, with the notable exception of symbolic -links. - -In the implementation, the driver is really set up to export most -useful symbols without polluting the name space or contending with -other public symbols. However, the symbols are not yet exported. If -we ever require a proc-fs style file system, this could be very useful -provided a little extra work is done to allow other drivers to overload -some operations. Particularly the file ops, I would think. diff --git a/libsysio/drivers/incore/fs_incore.c b/libsysio/drivers/incore/fs_incore.c deleted file mode 100644 index c2ce43369faeea90cf8cfe7d4f4d1ff26f9f12c4..0000000000000000000000000000000000000000 --- a/libsysio/drivers/incore/fs_incore.c +++ /dev/null @@ -1,1723 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <time.h> -#include <limits.h> -#include <errno.h> -#include <assert.h> -#include <sys/uio.h> -#include <sys/types.h> -#include <dirent.h> -#include <sys/stat.h> -#ifdef _HAVE_STATVFS -#include <sys/statvfs.h> -#endif -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "fs.h" -#include "mount.h" -#include "inode.h" -#include "dev.h" - -#include "fs_incore.h" - - -/* - * In-core file system pseudo-driver. - */ - -/* - * Pseudo-blocksize. - */ -#define INCORE_BLKSIZE (8192) - -/* - * Format of an incore inode. - */ -struct incore_inode { - LIST_ENTRY(incore_inode) ici_link; /* i-nodes list link */ - struct intnl_stat ici_st; /* attrs */ - struct file_identifier ici_fileid; /* file ID */ - void *ici_data; /* file data */ -}; - -/* - * Given pointer to inode, return pointer to incore-inode. - */ -#define I2IC(ino) ((struct incore_inode *)(ino)->i_private) - -struct incore_filesys { - LIST_HEAD(, incore_inode) icfs_icinodes; /* all i-nodes list */ -}; - -/* - * Given pointer to filesys, return pointer to incore-filesys. - */ -#define FS2ICFS(fs) ((struct incore_filesys *)(fs)->fs_private) - -static int _sysio_incore_fsswop_mount(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp); - -static struct fssw_ops incore_fssw_ops = { - _sysio_incore_fsswop_mount -}; - -static void _sysio_incore_fsop_gone(struct filesys *fs); - -static struct filesys_ops incore_fs_ops = { - _sysio_incore_fsop_gone, -}; - -static int _sysio_incore_dirop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt, - const char *path); -static int _sysio_incore_inop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stbuf); -static int _sysio_incore_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf); -static ssize_t _sysio_incore_dirop_filldirentries(struct inode *ino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes); -static int _sysio_incore_dirop_mkdir(struct pnode *pno, mode_t mode); -static int _sysio_incore_dirop_rmdir(struct pnode *pno); -static int _sysio_incore_inop_open(struct pnode *pno, int flags, mode_t mode); -static int _sysio_incore_inop_close(struct inode *ino); -static int _sysio_incore_dirop_link(struct pnode *old, struct pnode *new); -static int _sysio_incore_dirop_unlink(struct pnode *pno); -static int _sysio_incore_dirop_rename(struct pnode *old, struct pnode *new); -static int _sysio_incore_filop_read(struct inode *ino, struct ioctx *ioctx); -static int _sysio_incore_filop_write(struct inode *ino, struct ioctx *ioctx); -static _SYSIO_OFF_T _sysio_incore_filop_pos(struct inode *ino, - _SYSIO_OFF_T off); -static int _sysio_incore_filop_iodone(struct ioctx *ioctx); -static int _sysio_incore_filop_fcntl(struct inode *ino, - int cmd, va_list ap, int *rtn); -static int _sysio_incore_inop_sync(struct inode *ino); -static int _sysio_incore_filop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap); -static int _sysio_incore_dirop_mknod(struct pnode *pno, mode_t mode, dev_t dev); -#ifdef _HAVE_STATVFS -static int _sysio_incore_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf); -#endif -static void _sysio_incore_inop_gone(struct inode *ino); - -#define _sysio_incore_dirop_symlink \ - (int (*)(struct pnode *, const char *))_sysio_do_enosys -#define _sysio_incore_dirop_readlink \ - (int (*)(struct pnode *, char *, size_t))_sysio_do_enosys -#define _sysio_incore_dirop_read \ - (int (*)(struct inode *, \ - struct ioctx *))_sysio_do_eisdir -#define _sysio_incore_dirop_write \ - (int (*)(struct inode *, \ - struct ioctx *))_sysio_do_eisdir -#define _sysio_incore_dirop_pos \ - (_SYSIO_OFF_T (*)(struct inode *, \ - _SYSIO_OFF_T))_sysio_do_eisdir -#define _sysio_incore_dirop_iodone \ - (int (*)(struct ioctx *))_sysio_do_illop -#define _sysio_incore_dirop_fcntl \ - (int (*)(struct inode *, int, va_list, int *))_sysio_do_eisdir -#define _sysio_incore_dirop_ioctl \ - (int (*)(struct inode *, \ - unsigned long int, \ - va_list))_sysio_do_eisdir - -static struct inode_ops _sysio_incore_dir_ops = { - _sysio_incore_dirop_lookup, - _sysio_incore_inop_getattr, - _sysio_incore_inop_setattr, - _sysio_incore_dirop_filldirentries, - _sysio_incore_dirop_mkdir, - _sysio_incore_dirop_rmdir, - _sysio_incore_dirop_symlink, - _sysio_incore_dirop_readlink, - _sysio_incore_inop_open, - _sysio_incore_inop_close, - _sysio_incore_dirop_link, - _sysio_incore_dirop_unlink, - _sysio_incore_dirop_rename, - _sysio_incore_dirop_read, - _sysio_incore_dirop_write, - _sysio_incore_dirop_pos, - _sysio_incore_dirop_iodone, - _sysio_incore_dirop_fcntl, - _sysio_incore_inop_sync, - _sysio_incore_inop_sync, - _sysio_incore_dirop_ioctl, - _sysio_incore_dirop_mknod, -#ifdef _HAVE_STATVFS - _sysio_incore_inop_statvfs, -#endif - _sysio_incore_inop_gone -}; - -#define _sysio_incore_filop_lookup \ - (int (*)(struct pnode *, \ - struct inode **, \ - struct intent *, \ - const char *))_sysio_do_illop -#define _sysio_incore_filop_filldirentries \ - (ssize_t (*)(struct inode *, \ - _SYSIO_OFF_T *, \ - char *, \ - size_t))_sysio_do_illop -#define _sysio_incore_filop_mkdir \ - (int (*)(struct pnode *, mode_t))_sysio_do_illop -#define _sysio_incore_filop_rmdir \ - (int (*)(struct pnode *))_sysio_do_illop -#define _sysio_incore_filop_symlink \ - (int (*)(struct pnode *, const char *))_sysio_do_illop -#define _sysio_incore_symlinkop_readlink \ - (int (*)(struct pnode *, char *, size_t))_sysio_do_illop -#define _sysio_incore_filop_link \ - (int (*)(struct pnode *old, struct pnode *new))_sysio_do_illop -#define _sysio_incore_filop_unlink \ - (int (*)(struct pnode *pno))_sysio_do_illop -#define _sysio_incore_filop_rename \ - (int (*)(struct pnode *old, struct pnode *new))_sysio_do_illop -#define _sysio_incore_filop_mknod \ - (int (*)(struct pnode *pno, mode_t, dev_t))_sysio_do_illop - -static struct inode_ops _sysio_incore_file_ops = { - _sysio_incore_filop_lookup, - _sysio_incore_inop_getattr, - _sysio_incore_inop_setattr, - _sysio_incore_filop_filldirentries, - _sysio_incore_filop_mkdir, - _sysio_incore_filop_rmdir, - _sysio_incore_filop_symlink, - _sysio_incore_symlinkop_readlink, - _sysio_incore_inop_open, - _sysio_incore_inop_close, - _sysio_incore_filop_link, - _sysio_incore_filop_unlink, - _sysio_incore_filop_rename, - _sysio_incore_filop_read, - _sysio_incore_filop_write, - _sysio_incore_filop_pos, - _sysio_incore_filop_iodone, - _sysio_incore_filop_fcntl, - _sysio_incore_inop_sync, - _sysio_incore_inop_sync, - _sysio_incore_filop_ioctl, - _sysio_incore_filop_mknod, -#ifdef _HAVE_STATVFS - _sysio_incore_inop_statvfs, -#endif - _sysio_incore_inop_gone -}; - -static struct inode_ops _sysio_incore_dev_ops = { - _sysio_incore_filop_lookup, - _sysio_incore_inop_getattr, - _sysio_incore_inop_setattr, - _sysio_incore_filop_filldirentries, - _sysio_incore_filop_mkdir, - _sysio_incore_filop_rmdir, - _sysio_incore_filop_symlink, - _sysio_incore_symlinkop_readlink, - _sysio_nodev_inop_open, - _sysio_nodev_inop_close, - _sysio_incore_filop_link, - _sysio_incore_filop_unlink, - _sysio_incore_filop_rename, - _sysio_nodev_inop_read, - _sysio_nodev_inop_write, - _sysio_nodev_inop_pos, - _sysio_nodev_inop_iodone, - _sysio_incore_filop_fcntl, - _sysio_incore_inop_sync, - _sysio_nodev_inop_sync, - _sysio_nodev_inop_ioctl, - _sysio_incore_filop_mknod, -#ifdef _HAVE_STATVFS - _sysio_incore_inop_statvfs, -#endif - _sysio_incore_inop_gone -}; - -typedef void *(*probe_ty)(void *data, size_t len, void *arg); - -/* - * Lookup data argument bundle record. - */ -struct lookup_data { - struct qstr *name; /* desired entry name */ - struct intnl_dirent *de; /* last dirent */ - size_t minsiz; /* min hole needed */ - struct { - void *p; /* best hole */ - size_t len; /* best hole len */ - } hole; -}; - -/* - * Initialize lookup data argument bundle. - */ -#define INCORE_LD_INIT(ld, minsz, qs) \ - do { \ - (ld)->name = (qs); \ - (ld)->de = NULL; \ - (ld)->minsiz = (minsz); \ - (ld)->hole.p = NULL; \ - (ld)->hole.len = 0; \ - } while (0) - -/* - * Calculate size of a directory entry given length of the entry name. - */ -#define INCORE_D_RECLEN(namlen) \ - (((size_t )&((struct intnl_dirent *)0)->d_name + \ - (namlen) + 1 + sizeof(void *)) & \ - ~(sizeof(void *) - 1)) - -/* - * Given mode bits, return directory entry type code. - */ -#define INCORE_D_TYPEOF(m) (((m) & S_IFMT) >> 12) - -static char incore_dir_template[INCORE_D_RECLEN(1) + INCORE_D_RECLEN(2)]; -#if 0 -static struct intnl_dirent incore_dir_template[] = { - { - 0, - INCORE_D_RECLEN(1), - INCORE_D_RECLEN(1), - INCORE_D_TYPEOF(S_IFDIR), - { '.', '\0' } - }, - { - 0, - INCORE_D_RECLEN(1) + INCORE_D_RECLEN(2), - INCORE_D_RECLEN(2), - INCORE_D_TYPEOF(S_IFDIR), - { '.', '.', '\0' } - } -}; -#endif - -/* - * Initialize this driver. - */ -int -_sysio_incore_init() -{ - struct intnl_dirent *de; - off_t off; - - /* - * Fill in the directory template. - */ - de = (struct intnl_dirent *)incore_dir_template; -#ifdef _DIRENT_HAVE_D_OFF - de->d_off = -#endif - off = de->d_reclen = INCORE_D_RECLEN(1); - de->d_type = INCORE_D_TYPEOF(S_IFDIR); - de->d_name[0] = '.'; -#ifdef _DIRENT_HAVE_D_NAMLEN - de->d_namlen = 1; -#endif - /* - * Move to entry for `..' - */ - de = (struct intnl_dirent *)((char *)de + off); - de->d_reclen = INCORE_D_RECLEN(2); -#ifdef _DIRENT_HAVE_D_NAMLEN - de->d_namlen = 2; -#endif -#ifdef _DIRENT_HAVE_D_OFF - de->d_off = -#endif - off += de->d_reclen; - de->d_type = INCORE_D_TYPEOF(S_IFDIR); - de->d_name[0] = de->d_name[1] = '.'; - de->d_name[2] = ' '; - - return _sysio_fssw_register("incore", &incore_fssw_ops); -} - -static ino_t -incore_inum_alloc() -{ - static ino_t nxtnum = 1; - - assert(nxtnum); - return nxtnum++; -} - -static struct incore_inode * -incore_i_alloc(struct incore_filesys *icfs, struct intnl_stat *st) -{ - struct incore_inode *icino; - - assert(st->st_ino); - assert(!st->st_size); - - icino = malloc(sizeof(struct incore_inode)); - if (!icino) - return NULL; - icino->ici_st = *st; - icino->ici_fileid.fid_data = &icino->ici_st.st_ino; - icino->ici_fileid.fid_len = sizeof(icino->ici_st.st_ino); - icino->ici_data = NULL; - - LIST_INSERT_HEAD(&icfs->icfs_icinodes, icino, ici_link); - - return icino; -} - -static int -incore_trunc(struct incore_inode *icino, _SYSIO_OFF_T size, int clear) -{ - _SYSIO_OFF_T n; - void *p; - - if (size < 0) - return -EINVAL; - n = size; - if (!size) { - if (icino->ici_data) { - free(icino->ici_data); - icino->ici_data = NULL; - } - n = 0; - goto out; - } - p = realloc(icino->ici_data, (size_t )n); - if (!p) - return -ENOSPC; - icino->ici_data = p; - if (clear && n > icino->ici_st.st_size) - (void )memset((char *)icino->ici_data + icino->ici_st.st_size, - 0, - (size_t )(n - icino->ici_st.st_size)); -out: - icino->ici_st.st_size = n; - icino->ici_st.st_blocks = - (n + icino->ici_st.st_blksize - 1) / icino->ici_st.st_blksize; - icino->ici_st.st_mtime = time(NULL); - return 0; -} - -static void -incore_i_destroy(struct incore_inode *icino) -{ - - LIST_REMOVE(icino, ici_link); - (void )incore_trunc(icino, 0, 0); - free(icino); -} - -static struct incore_inode * -incore_directory_new(struct incore_filesys *icfs, - struct incore_inode *parent, - struct intnl_stat *st) -{ - struct incore_inode *icino; - int err; - struct intnl_dirent *de; - - icino = incore_i_alloc(icfs, st); - if (!icino) - return NULL; - - if (!parent) - parent = icino; /* root */ - - /* - * Allocate and init directory data. - */ - err = incore_trunc(icino, sizeof(incore_dir_template), 1); - if (err) { - incore_i_destroy(icino); - return NULL; - } - (void )memcpy(icino->ici_data, - &incore_dir_template, - sizeof(incore_dir_template)); - de = icino->ici_data; - de->d_ino = st->st_ino; - de = - (struct intnl_dirent *)((char *)de + -#ifdef _DIRENT_HAVE_D_OFF - de->d_off -#else - de->d_reclen -#endif - ); - de->d_ino = parent->ici_st.st_ino; - - /* - * Set creation time to modify time set by truncate. - */ - st->st_ctime = st->st_mtime; - - return icino; -} - -static int -_sysio_incore_fsswop_mount(const char *source, - unsigned flags, - const void *data __IS_UNUSED, - struct pnode *tocover, - struct mount **mntp) -{ - char *cp; - unsigned long ul; - long l; - mode_t mode; - uid_t uid; - gid_t gid; - int err; - dev_t dev; - struct intnl_stat stat; - struct incore_filesys *icfs; - ino_t inum; - struct incore_inode *icino; - struct filesys *fs; - struct inode *rooti; - struct pnode_base *rootpb; - struct mount *mnt; - static struct qstr noname = { NULL, 0, 0 }; - - /* - * Source is a specification for the root attributes of this - * new file system in the format: - * - * <permissions>[+<owner>][-<group>] - */ - ul = strtoul(source, &cp, 0); - mode = (mode_t )ul & 07777; - uid = getuid(); /* default */ - gid = getgid(); /* default */ - if (*cp != '\0') { - /* - * Get user and/or group. - */ - if (*cp != '+' || - (ul == ULONG_MAX && errno == ERANGE) || - (unsigned long)mode != ul || - mode > 07777) - return -EINVAL; - source = cp; - l = strtol(source, &cp, 0); - uid = (uid_t )l; - if (((l == LONG_MIN || l == LONG_MAX) && - errno == ERANGE) || - (long )uid != l) - return -EINVAL; - if (*cp != '+') - return -EINVAL; - source = cp; - l = strtol(source, &cp, 0); - gid = (gid_t )l; - if (((l == LONG_MIN || l == LONG_MAX) && - errno == ERANGE) || - (long )gid != l) - return -EINVAL; - if (*cp != '\0') - return -EINVAL; - } - - err = 0; - - dev = _sysio_dev_alloc(); - - mnt = NULL; - rootpb = NULL; - rooti = NULL; - fs = NULL; - icino = NULL; - icfs = NULL; - - /* - * Create new FS. - */ - icfs = malloc(sizeof(struct incore_filesys)); - if (!icfs) { - err = -ENOMEM; - goto error; - } - (void )memset(icfs, 0, sizeof(struct incore_filesys)); - LIST_INIT(&icfs->icfs_icinodes); - - /* - * Create root i-node. - */ - (void )memset(&stat, 0, sizeof(stat)); - stat.st_dev = dev; - inum = incore_inum_alloc(); -#ifdef HAVE__ST_INO - stat.__st_ino = inum; -#endif - stat.st_mode = S_IFDIR | (mode & 07777); - stat.st_nlink = 2; - stat.st_uid = uid; - stat.st_gid = gid; - stat.st_size = 0; - stat.st_blksize = INCORE_BLKSIZE; - stat.st_blocks = 0; - stat.st_ctime = stat.st_mtime = stat.st_atime = 0; - stat.st_ino = inum; - icino = incore_directory_new(icfs, NULL, &stat); - if (!icino) - return -ENOSPC; - icino->ici_st.st_atime = icino->ici_st.st_mtime; - - fs = - _sysio_fs_new(&incore_fs_ops, - (flags & MOUNT_F_RO) ? FS_F_RO : 0, - icfs); - if (!fs) { - err = -ENOMEM; - goto error; - } - - /* - * Create root for system. - * - * Persistent across remounts because we ask for immunity. - */ - rooti = - _sysio_i_new(fs, - &icino->ici_fileid, - &icino->ici_st, - 1, - &_sysio_incore_dir_ops, - icino); - if (!rooti) { - err = -ENOMEM; - goto error; - } - rootpb = _sysio_pb_new(&noname, NULL, rooti); - if (!rootpb) { - err = -ENOMEM; - goto error; - } - - /* - * Have path-node specified by the given source argument. Let the - * system finish the job, now. - */ - mnt = NULL; - err = - _sysio_do_mount(fs, - rootpb, - flags, - tocover, - &mnt); - if (err) - goto error; - - *mntp = mnt; - - goto out; - -error: - if (mnt && _sysio_do_unmount(mnt) != 0) - abort(); - if (rootpb) { - _sysio_pb_gone(rootpb); - rooti = NULL; - } - if (rooti) - I_RELE(rooti); - if (fs) { - FS_RELE(fs); - goto out; - } - if (icino) { - incore_i_destroy(icino); - goto out; - } - if (icfs) { - free(icfs); - goto out; - } - -out: - return err; -} - -static void -_sysio_incore_fsop_gone(struct filesys *fs) -{ - struct incore_filesys *icfs; - struct incore_inode *icino, *oicino; - - icfs = FS2ICFS(fs); - - /* - * Free up i-node resource associated with this file system. - */ - icino = icfs->icfs_icinodes.lh_first; - while (icino) { - oicino = icino; - icino = icino->ici_link.le_next; - incore_i_destroy(oicino); - } - - /* - * Free the FS record. - */ - free(icfs); -} - -/* - * A directory search engine. Various functions are carried out by - * supplying appropriate callback functions. - * - * The two arguments, entry and hole, are called, if not null, for each - * directory entry and hole, respectively. - */ -static void * -incore_directory_probe(void *data, - size_t siz, - _SYSIO_OFF_T origin -#ifndef _DIRENT_HAVE_D_OFF - __IS_UNUSED -#endif - , - probe_ty entry, - probe_ty hole, - void *arg) -{ - struct intnl_dirent *de; - void *p; - size_t n; - - de = data; - for (;;) { -#ifdef _DIRENT_HAVE_D_OFF - assert(de->d_off); -#else - assert(de->d_reclen); -#endif - if (entry && (p = (*entry)(de, de->d_reclen, arg))) - return p; - n = -#ifdef _DIRENT_HAVE_D_OFF - de->d_off - origin; -#else - ((void *)de - data) + de->d_reclen; -#endif - if (hole) { - p = (*hole)((void *)de, de->d_reclen, arg); - if (p) - return p; - } - if (n >= siz) - break; - de = (struct intnl_dirent *)((char *)data + n); - } - - return NULL; -} - -static struct intnl_dirent * -incore_directory_match(struct intnl_dirent *de, - size_t reclen, - struct lookup_data *ld) -{ - size_t len; - -#if defined(BSD) || defined(REDSTORM) - if (IFTODT(de->d_type) == DT_WHT) - return NULL; -#endif -#ifdef _DIRENT_HAVE_D_NAMLEN - len = de->d_namlen; -#else - { - const char *cp, *end; - - cp = de->d_name; - end = (const char *)de + reclen; - while (cp < end && *cp != '\0') - cp++; - len = cp - de->d_name; - } -#endif - if (ld->name->len == len && - strncmp(de->d_name, ld->name->name, ld->name->len) == 0) - return de; - ld->de = de; - return NULL; -} - -static int -_sysio_incore_dirop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt __IS_UNUSED, - const char *path __IS_UNUSED) -{ - struct inode *ino; - struct intnl_dirent *de; - struct incore_inode *icino; - struct lookup_data lookup_data; - struct file_identifier fileid; -#ifdef notdef - struct inode_ops *ops; -#endif - - /* - * Revalidate? - */ - if (*inop) { - icino = I2IC(*inop); - assert(icino); - (*inop)->i_stbuf = icino->ici_st; - return 0; - } - - ino = pno->p_parent->p_base->pb_ino; - icino = I2IC(ino); - INCORE_LD_INIT(&lookup_data, - ULONG_MAX, - &pno->p_base->pb_name); - de = - incore_directory_probe(icino->ici_data, - icino->ici_st.st_size, - 0, - (probe_ty )incore_directory_match, - NULL, - &lookup_data); - if (!de) - return -ENOENT; - - fileid.fid_data = &de->d_ino; - fileid.fid_len = sizeof(de->d_ino); - ino = - _sysio_i_find(ino->i_fs, &fileid); -#ifdef notdef - if (ino) - goto out; - icino->ici_fileid.fid_data = &icino->ici_st.st_ino; - icino->ici_fileid.fid_len = sizeof(icino->ici_st.st_ino); - ops = NULL; - switch (icino->ici_st.st_mode & S_IFMT) { - case S_IFDIR: - ops = &_sysio_incore_dir_ops; - break; - case S_IFREG: - ops = &_sysio_incore_file_ops; - break; - default: - break; - } - if (!ops) - abort(); - ino = - _sysio_i_new(ino->i_fs, - &icino->ici_fileid, - &icino->ici_st - 1, - ops, - icino); -#endif - if (!ino) - return -ENOMEM; - -#ifdef notdef -out: -#endif - *inop = ino; - return 0; -} - -static int -_sysio_incore_inop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stbuf) -{ - struct incore_inode *icino; - - if (!ino) - ino = pno->p_base->pb_ino; - icino = I2IC(ino); - *stbuf = icino->ici_st; - return 0; -} - -static int -_sysio_incore_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf) -{ - struct incore_inode *icino; - int err; - - if (!ino) - ino = pno->p_base->pb_ino; - if (!ino) - return -EBADF; - icino = I2IC(ino); - - err = 0; - if (mask & SETATTR_LEN) { - err = incore_trunc(icino, stbuf->st_size, 1); - if (err) - goto out; - mask &= ~SETATTR_LEN; - } - if (mask & SETATTR_MODE) { - icino->ici_st.st_mode = - (icino->ici_st.st_mode & S_IFMT) | (stbuf->st_mode & 07777); - } - if (mask & SETATTR_MTIME) - icino->ici_st.st_mtime = stbuf->st_mtime; - if (mask & SETATTR_ATIME) - icino->ici_st.st_atime = stbuf->st_atime; - if (mask & SETATTR_UID) - icino->ici_st.st_uid = stbuf->st_uid; - if (mask & SETATTR_GID) - icino->ici_st.st_gid = stbuf->st_gid; - icino->ici_st.st_ctime = time(NULL); - - ino->i_stbuf = icino->ici_st; -out: - return err; -} - -static void * -incore_directory_position(struct intnl_dirent *de, - size_t reclen __IS_UNUSED, - void *p) -{ - - return (void *)de >= p ? de : NULL; -} - -struct copy_info { - void *data; - size_t nbytes; - unsigned count; -}; - -/* - * Eumeration callback. - * - * Note: - * Whiteout entries are never returned. - */ -static void * -incore_directory_enumerate(struct intnl_dirent *de, - size_t reclen, - struct copy_info *cinfo) { - -#ifdef DT_WHT - if (de->d_type == DT_WHT) { - /* - * Keep going but skip the copy. - */ - return NULL; - } -#endif - cinfo->count++; - if (reclen > cinfo->nbytes) - return de; - (void *)memcpy(cinfo->data, de, reclen); - cinfo->data = (char *)cinfo->data + reclen; - cinfo->nbytes -= reclen; - return NULL; -} - -static ssize_t -_sysio_incore_dirop_filldirentries(struct inode *ino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes) -{ - struct incore_inode *icino = I2IC(ino); - off_t off; - struct intnl_dirent *de; - struct copy_info copy_info; - - if (*posp >= icino->ici_st.st_size) - return 0; - - de = - incore_directory_probe(icino->ici_data, - icino->ici_st.st_size, - *posp, - (probe_ty )incore_directory_position, - NULL, - (char *)icino->ici_data + *posp); - if (!de) { - /* - * Past EOF. - */ - return 0; - } - - copy_info.data = buf; - copy_info.nbytes = nbytes; - copy_info.count = 0; - off = (char *)de - (char *)icino->ici_data; - de = - incore_directory_probe(de, - icino->ici_st.st_size - off, - off, - (probe_ty )incore_directory_enumerate, - NULL, - ©_info); - icino->ici_st.st_atime = time(NULL); - if (nbytes == copy_info.nbytes && copy_info.count) - return -EINVAL; - nbytes -= copy_info.nbytes; -#if 0 - if (!nbytes) - return -EOVERFLOW; -#endif - *posp += nbytes; - return (ssize_t )nbytes; -} - -static struct intnl_dirent * -incore_directory_best_fit(void *data, size_t len, struct lookup_data *ld) -{ - - if (!ld->hole.len || len < ld->hole.len) { - ld->hole.p = data; - ld->hole.len = len; - } - - return NULL; -} - -static int -incore_directory_insert(struct incore_inode *parent, - struct qstr *name, - ino_t inum, - unsigned char type) -{ - size_t reclen; - struct lookup_data lookup_data; - struct intnl_dirent *de; - size_t xt; - size_t n; - size_t r; - - reclen = INCORE_D_RECLEN(name->len); - INCORE_LD_INIT(&lookup_data, reclen, name); - de = - incore_directory_probe(parent->ici_data, - parent->ici_st.st_size, - 0, - (probe_ty )incore_directory_match, - (probe_ty )incore_directory_best_fit, - &lookup_data); - if (de) - return -EEXIST; - de = lookup_data.de; - xt = (char *)lookup_data.de - (char *)parent->ici_data; - n = -#ifdef _DIRENT_HAVE_D_OFF - de->d_off; -#else - xt + de->d_reclen; -#endif - r = -#ifdef _DIRENT_HAVE_D_OFF - de->d_reclen; -#else - INCORE_D_RECLEN(de->d_namlen); -#endif - if (!parent->ici_st.st_size || - xt + r + reclen > (size_t )parent->ici_st.st_size) { - int err; - - err = incore_trunc(parent, xt + r + reclen, 1); - if (err) - return err; - de = (struct intnl_dirent *)((char *)parent->ici_data + xt); - n = parent->ici_st.st_size; - } - -#ifdef _DIRENT_HAVE_D_OFF - de->d_off = xt + r; /* trim */ -#else - de->d_reclen = r; -#endif - de = (struct intnl_dirent *)((char *)de + r); /* reposition */ - xt += r; - -#ifndef _DIRENT_HAVE_D_OFF - /* - * Will we split this hole or use all of it? - */ - if (lookup_data.hole.len - reclen && - lookup_data.hole.len - reclen <= INCORE_D_RECLEN(1)) - reclen = lookup_data.hole.len; -#endif - - /* - * Insert new. - */ - de->d_ino = inum; -#ifdef _DIRENT_HAVE_D_OFF - de->d_off = n; -#endif - de->d_reclen = reclen; - de->d_type = type; - (void )memcpy(de->d_name, name->name, name->len); -#ifdef _DIRENT_HAVE_D_NAMLEN - de->d_namlen = name->len; -#endif - -#ifndef _DIRENT_HAVE_D_OFF - xt += reclen; - if (n - xt) { - /* - * White-out remaining part of the hole. - */ - (void *)de += reclen; - de->d_ino = 0; - de->d_reclen = n - xt; - de->d_type = DT_WHT; - de->d_namlen = 0; - } -#endif - - /* - * Update attributes to reflect the new entry. - */ - parent->ici_st.st_nlink++; - assert(parent->ici_st.st_nlink); - parent->ici_st.st_atime = parent->ici_st.st_mtime = time(NULL); - - return 0; -} - -static int -_sysio_incore_dirop_mkdir(struct pnode *pno, mode_t mode) -{ - struct intnl_stat stat; - struct incore_inode *icino, *parent; - ino_t inum; - int err; - struct intnl_dirent *de = NULL; - struct inode *ino; - - ino = pno->p_parent->p_base->pb_ino; - parent = I2IC(ino); - - if (!S_ISDIR(parent->ici_st.st_mode)) - return -ENOTDIR; - - (void )memset(&stat, 0, sizeof(stat)); - stat.st_dev = pno->p_parent->p_base->pb_ino->i_fs->fs_dev; - inum = incore_inum_alloc(); -#ifdef HAVE__ST_INO - stat.__st_ino = inum; -#endif - stat.st_mode = S_IFDIR | (mode & 07777); - stat.st_nlink = 2; - stat.st_uid = getuid(); - stat.st_gid = getgid(); - stat.st_size = 0; - stat.st_blksize = 4096; - stat.st_blocks = 0; - stat.st_ctime = stat.st_mtime = stat.st_atime = 0; - stat.st_ino = inum; - icino = incore_directory_new(FS2ICFS(ino->i_fs), parent, &stat); - if (!icino) - return -ENOSPC; - - /* - * Tell the system about the new inode. - * - * Persistent across remounts because we ask for immunity. - */ - ino = - _sysio_i_new(pno->p_parent->p_base->pb_ino->i_fs, - &icino->ici_fileid, - &stat, - 1, - &_sysio_incore_dir_ops, - icino); - if (!ino) { - incore_i_destroy(icino); - return -ENOMEM; - } - - /* - * Insert into parent. - */ - err = - incore_directory_insert(parent, - &pno->p_base->pb_name, - stat.st_ino, - INCORE_D_TYPEOF(S_IFDIR)); - - if (err) { - de->d_ino = 0; /* bad parent */ - I_RELE(ino); - _sysio_i_gone(ino); - return err; - } - - pno->p_base->pb_ino = ino; - return 0; -} - -static int -incore_unlink_entry(struct incore_inode *icino, - struct qstr *name) -{ - struct lookup_data lookup_data; - struct intnl_dirent *de; - size_t reclen; -#ifdef _DIRENT_HAVE_D_OFF - size_t off; -#endif - - if (!S_ISDIR(icino->ici_st.st_mode)) - return -ENOTDIR; - - INCORE_LD_INIT(&lookup_data, 0, name); - de = - incore_directory_probe(icino->ici_data, - icino->ici_st.st_size, - 0, - (probe_ty )incore_directory_match, - NULL, - &lookup_data); - if (!de) - return -ENOENT; - assert((size_t )((char *)de - (char *)icino->ici_data) >= - sizeof(incore_dir_template)); -#ifndef _DIRENT_HAVE_D_OFF - reclen = de->d_reclen; -#else - off = de->d_off; - reclen = off - ((char *)de - (char *)icino->ici_data); -#endif - (void )memset(de, 0, reclen); -#ifndef _DIRENT_HAVE_D_OFF - de->d_type = (__uint8_t )DTTOIF(DT_WHT); - de->d_reclen = reclen; -#else - lookup_data.de->d_off = off; -#endif - - /* - * Adjust link count. - */ - assert(icino->ici_st.st_nlink > 2); - icino->ici_st.st_nlink--; - - return 0; -} - -static int -_sysio_incore_dirop_rmdir(struct pnode *pno) -{ - struct inode *ino = pno->p_base->pb_ino; - struct incore_inode *icino = I2IC(ino); - int err; - - if (!pno->p_base->pb_name.len || - (pno->p_base->pb_name.name[0] == '.' && - (pno->p_base->pb_name.len == 1 || - (pno->p_base->pb_name.len == 2 && - pno->p_base->pb_name.name[1] == '.')))) - return -EINVAL; - - if (!S_ISDIR(icino->ici_st.st_mode)) - return -ENOTDIR; - - if (icino->ici_st.st_nlink > 2) - return -ENOTEMPTY; - - pno->p_base->pb_ino = NULL; - err = - incore_unlink_entry(I2IC(pno->p_parent->p_base->pb_ino), - &pno->p_base->pb_name); - return err; -} - -static int -incore_create(struct pnode *pno, struct intnl_stat *stat) -{ - struct inode *dino, *ino; - struct incore_inode *icino; - int err; - - dino = pno->p_parent->p_base->pb_ino; - assert(dino); - - icino = incore_i_alloc(FS2ICFS(dino->i_fs), stat); - if (!icino) - return -ENOSPC; - - /* - * Tell the system about the new inode. - */ - ino = - _sysio_i_new(dino->i_fs, - &icino->ici_fileid, - stat, - 1, - S_ISREG(stat->st_mode) - ? &_sysio_incore_file_ops - : &_sysio_incore_dev_ops, - icino); - if (!ino) { - incore_i_destroy(icino); - return -ENOMEM; - } - - /* - * Insert into parent. - */ - err = - incore_directory_insert(I2IC(dino), - &pno->p_base->pb_name, - stat->st_ino, - INCORE_D_TYPEOF(icino->ici_st.st_mode)); - if (err) { - I_RELE(ino); - _sysio_i_gone(ino); - return err; - } - - pno->p_base->pb_ino = ino; - return 0; -} - -static int -_sysio_incore_inop_open(struct pnode *pno, int flags __IS_UNUSED, mode_t mode) -{ - struct intnl_stat stat; - ino_t inum; - - /* - * File exists. Nothing to do. - */ - if (pno->p_base->pb_ino) - return 0; - - /* - * Must create a new, regular, file. - */ - (void )memset(&stat, 0, sizeof(stat)); - stat.st_dev = pno->p_parent->p_base->pb_ino->i_fs->fs_dev; - inum = incore_inum_alloc(); -#ifdef HAVE__ST_INO - stat.__st_ino = inum; -#endif - stat.st_mode = S_IFREG | (mode & 07777); - stat.st_nlink = 1; - stat.st_uid = getuid(); - stat.st_gid = getgid(); - stat.st_rdev = 0; - stat.st_size = 0; - stat.st_blksize = 4096; - stat.st_blocks = 0; - stat.st_ctime = stat.st_mtime = stat.st_atime = 0; - stat.st_ino = inum; - - return incore_create(pno, &stat); -} - -static int -_sysio_incore_inop_close(struct inode *ino __IS_UNUSED) -{ - - return 0; -} - -static int -_sysio_incore_dirop_link(struct pnode *old, struct pnode *new) -{ - struct incore_inode *icino = I2IC(old->p_base->pb_ino); - int err; - - assert(!new->p_base->pb_ino); - assert(!S_ISDIR(old->p_base->pb_ino->i_stbuf.st_mode)); - - /* - * Can bump the link count? - */ - if (!(icino->ici_st.st_nlink + 1)) - return -EMLINK; - /* - * Insert into parent. - */ - err = - incore_directory_insert(I2IC(new->p_parent->p_base->pb_ino), - &new->p_base->pb_name, - icino->ici_st.st_ino, - INCORE_D_TYPEOF(icino->ici_st.st_mode)); - if (err) - return err; - /* - * Bump the link count. - */ - icino->ici_st.st_nlink++; - - return 0; -} - -static int -_sysio_incore_dirop_rename(struct pnode *old, struct pnode *new) -{ - int err; - struct incore_inode *icino = I2IC(old->p_base->pb_ino); - - if (new->p_base->pb_ino) { - /* - * Have to kill off the target first. - */ - if (S_ISDIR(I2IC(new->p_base->pb_ino)->ici_st.st_mode) && - I2IC(new->p_base->pb_ino)->ici_st.st_nlink > 2) - return -ENOTEMPTY; - err = - incore_unlink_entry(I2IC(new->p_parent->p_base->pb_ino), - &new->p_base->pb_name); - if (err) - return err; - } - - /* - * Insert into new parent. - */ - err = - incore_directory_insert(I2IC(new->p_parent->p_base->pb_ino), - &new->p_base->pb_name, - icino->ici_st.st_ino, - INCORE_D_TYPEOF(icino->ici_st.st_mode)); - if (err) - abort(); - /* - * Remove from the old parent. - */ - err = - incore_unlink_entry(I2IC(old->p_parent->p_base->pb_ino), - &old->p_base->pb_name); - if (err) - abort(); - - if (S_ISDIR(icino->ici_st.st_mode)) { - struct intnl_dirent *de; - - /* - * We moved a directory. The entry for `..' must be corrected. - */ - de = icino->ici_data; - de++; - assert(strcmp(de->d_name, "..") == 0); - de->d_ino = I2IC(new->p_parent->p_base->pb_ino)->ici_st.st_ino; - } - return 0; -} - -static int -_sysio_incore_dirop_unlink(struct pnode *pno) -{ - struct inode *ino = pno->p_base->pb_ino; - struct incore_inode *icino = I2IC(ino); - int err; - - if (S_ISDIR(icino->ici_st.st_mode)) - return -EISDIR; - - err = - incore_unlink_entry(I2IC(pno->p_parent->p_base->pb_ino), - &pno->p_base->pb_name); - return err; -} - -static int -doio(ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, struct incore_inode *), - struct inode *ino, - struct ioctx *ioctx) -{ - - ioctx->ioctx_cc = - _sysio_doio(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, - ioctx->ioctx_iov, ioctx->ioctx_iovlen, - (ssize_t (*)(void *, size_t, _SYSIO_OFF_T, void *))f, - I2IC(ino)); - if (ioctx->ioctx_cc < 0) { - ioctx->ioctx_errno = -ioctx->ioctx_cc; - ioctx->ioctx_cc = -1; - } - ioctx->ioctx_done = 1; - - return 0; -} - -static ssize_t -incore_read(void *buf, size_t nbytes, - _SYSIO_OFF_T off, - struct incore_inode *icino) -{ - size_t n; - - if (off < 0) - return -EINVAL; - if (!nbytes || off > icino->ici_st.st_size) - return 0; - n = icino->ici_st.st_size - (size_t )off; - if (n > nbytes) - n = nbytes; - (void )memcpy(buf, (char *)icino->ici_data + off, (size_t )n); - - return (ssize_t )n; -} - -static int -_sysio_incore_filop_read(struct inode *ino, struct ioctx *ioctx) -{ - - - return doio(incore_read, ino, ioctx); -} - -static ssize_t -incore_write(const void *buf, size_t nbytes, - _SYSIO_OFF_T off, - struct incore_inode *icino) -{ - _SYSIO_OFF_T pos; - - if (off < 0) - return -EINVAL; - if (!nbytes || off > icino->ici_st.st_size) - return 0; - pos = off + nbytes; - if (off && pos <= off) { - /* - * It's all or nothing. We won't write just part of - * the buffer. - */ - return -EFBIG; - } - if (pos > icino->ici_st.st_size) { - int err; - - err = incore_trunc(icino, (size_t )pos, 0); - if (err) - return err; - } - (void )memcpy((char *)icino->ici_data + off, buf, nbytes); - - return (ssize_t )nbytes; -} - -static int -_sysio_incore_filop_write(struct inode *ino, struct ioctx *ioctx) -{ - - return doio((ssize_t (*)(void *, size_t, - _SYSIO_OFF_T, - struct incore_inode *))incore_write, - ino, - ioctx); -} - -static _SYSIO_OFF_T -_sysio_incore_filop_pos(struct inode *ino __IS_UNUSED, _SYSIO_OFF_T off) -{ - - return off; -} - -static int -_sysio_incore_filop_iodone(struct ioctx *iocp __IS_UNUSED) -{ - - /* - * It's always done in this driver. It completed when posted. - */ - return 1; -} - -static int -_sysio_incore_filop_fcntl(struct inode *ino __IS_UNUSED, - int cmd __IS_UNUSED, - va_list ap __IS_UNUSED, - int *rtn) -{ - - /* - * No fcntl's supported. - */ - *rtn = -1; - return -ENOTTY; -} - -static int -_sysio_incore_inop_sync(struct inode *ino __IS_UNUSED) -{ - - /* - * With what? - */ - return 0; -} - -static int -_sysio_incore_filop_ioctl(struct inode *ino __IS_UNUSED, - unsigned long int request __IS_UNUSED, - va_list ap __IS_UNUSED) -{ - - /* - * No ioctl's supported. - */ - return -ENOTTY; -} - -static int -_sysio_incore_dirop_mknod(struct pnode *pno, mode_t mode, dev_t dev) -{ - mode_t m; - struct intnl_stat stat; - ino_t inum; - - assert(!pno->p_base->pb_ino); - - m = mode & S_IFMT; - if (S_ISCHR(m)) - m &= ~S_IFCHR; - else if (S_ISFIFO(m)) - m &= ~S_IFIFO; - else if (S_ISBLK(m)) - m &= ~S_IFCHR; - else - return -EINVAL; - if (m) - return -EINVAL; - - /* - * Initialize attributes. - */ - (void )memset(&stat, 0, sizeof(stat)); - stat.st_dev = pno->p_parent->p_base->pb_ino->i_fs->fs_dev; - inum = incore_inum_alloc(); -#ifdef HAVE__ST_INO - stat.__st_ino = inum; -#endif - stat.st_mode = mode; - stat.st_nlink = 1; - stat.st_uid = getuid(); - stat.st_gid = getgid(); - stat.st_rdev = dev; - stat.st_size = 0; - stat.st_blksize = 4096; - stat.st_blocks = 0; - stat.st_ctime = stat.st_mtime = stat.st_atime = 0; - stat.st_ino = inum; - - return incore_create(pno, &stat); -} - -#ifdef _HAVE_STATVFS -static int -_sysio_incore_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf) -{ - struct filesys *fs; - - if (!ino) - ino = pno->p_base->pb_ino; - assert(ino); - - fs = pno->p_base->pb_ino->i_fs; - - (void )memset(buf, 0, sizeof(struct intnl_statvfs)); - - /* - * Mostly, we lie. - */ - buf->f_bsize = fs->fs_bsize; - buf->f_frsize = buf->f_bsize; - buf->f_blocks = ~0; - buf->f_blocks /= buf->f_bsize; - buf->f_bfree = buf->f_blocks - 1; - buf->f_bavail = buf->f_bfree; - buf->f_files = buf->f_blocks; - buf->f_ffree = buf->f_files - 1; - buf->f_favail = buf->f_ffree; - buf->f_fsid = fs->fs_id; - buf->f_flag = 0; - buf->f_namemax = ULONG_MAX; - - return 0; -} -#endif - -void -_sysio_incore_inop_gone(struct inode *ino) -{ - struct incore_inode *icino = I2IC(ino); - - incore_i_destroy(icino); -} diff --git a/libsysio/drivers/incore/fs_incore.h b/libsysio/drivers/incore/fs_incore.h deleted file mode 100644 index 84fa631cdebc79afdc3b4d1282b090f4f4c4ae8b..0000000000000000000000000000000000000000 --- a/libsysio/drivers/incore/fs_incore.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Native file system driver support. - */ - -extern int _sysio_incore_init(void); diff --git a/libsysio/drivers/incore/module.mk b/libsysio/drivers/incore/module.mk deleted file mode 100644 index 140d69b80005d65ea280b9e76fc2144a60f59e87..0000000000000000000000000000000000000000 --- a/libsysio/drivers/incore/module.mk +++ /dev/null @@ -1,2 +0,0 @@ -INCORE_SRCS = drivers/incore/fs_incore.c -INCORE_EXTRA = drivers/incore/fs_incore.h drivers/incore/module.mk diff --git a/libsysio/drivers/native/.cvsignore b/libsysio/drivers/native/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/drivers/native/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/drivers/native/fs_native.c b/libsysio/drivers/native/fs_native.c deleted file mode 100644 index 1909e2e72c50c447a787b40398d53472998ffc79..0000000000000000000000000000000000000000 --- a/libsysio/drivers/native/fs_native.c +++ /dev/null @@ -1,1813 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#include <stdio.h> /* for NULL */ -#include <stdlib.h> -#ifdef __linux__ -#include <string.h> -#endif -#include <unistd.h> -#if !(defined(REDSTORM) || defined(MAX_IOVEC)) -#include <limits.h> -#endif -#include <errno.h> -#include <assert.h> -#include <syscall.h> -#include <sys/time.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/fcntl.h> -#if 0 -#include <sys/vfs.h> -#endif -#ifdef _HAVE_STATVFS -#include <sys/statvfs.h> -#include <sys/statfs.h> -#endif -#include <utime.h> -#include <sys/uio.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "native.h" -#include "fs.h" -#include "mount.h" -#include "inode.h" - -#include "fs_native.h" - -#ifdef REDSTORM -#include <sys/uio.h> -#endif - -#if defined(SYSIO_SYS_getdirentries) -#define DIR_CVT_64 0 -#elif defined(SYSIO_SYS_getdents64) -#define DIR_CVT_64 0 -#elif defined(SYSIO_SYS_getdents) -#if defined(_LARGEFILE64_SOURCE) -#define DIR_CVT_64 1 -/* - * Kernel version of directory entry. - */ -struct linux_dirent { - unsigned long ld_ino; - unsigned long ld_off; - unsigned short ld_reclen; - char ld_name[1]; -}; -#include <dirent.h> -#else /* !defined(_LARGEFILE64_SOURCE) */ -#define DIR_CVT_64 0 -#endif /* defined(_LARGEFILE64_SOURCE) */ -#else /* catch-none */ -#error No usable directory fill entries interface available -#endif - -/* - * Native file system information we keep per FS. - */ -struct native_filesystem { - time_t nfs_atimo; /* attr timeout (sec) */ -}; - -/* - * Given fs, return driver private part. - */ -#define FS2NFS(fs) \ - ((struct native_filesystem *)(fs)->fs_private) - -/* - * Native file identifiers format. - */ -struct native_inode_identifier { - dev_t dev; /* device number */ - ino_t ino; /* i-number */ -#ifdef HAVE_GENERATION - unsigned int gen; /* generation number */ -#endif -}; - -/* - * Driver-private i-node information we keep about local host file - * system objects. - */ -struct native_inode { - unsigned - ni_seekok : 1, /* can seek? */ - ni_attrvalid : 1, /* cached attrs ok? */ - ni_resetfpos : 1; /* reset fpos? */ - struct native_inode_identifier ni_ident; /* unique identifier */ - struct file_identifier ni_fileid; /* ditto */ - int ni_fd; /* host fildes */ - int ni_oflags; /* flags, from open */ - unsigned ni_nopens; /* soft ref count */ - _SYSIO_OFF_T ni_fpos; /* current pos */ - time_t ni_attrtim; /* attrs expire time */ -}; - -/* - * Cached attributes usable? - */ -#define NATIVE_ATTRS_VALID(nino, t) \ - ((nino)->ni_attrtim && (t) < (nino)->ni_attrtim) - -/* - * Native IO path arguments. - */ -struct native_io { - char nio_op; /* 'r' or 'w' */ - struct native_inode *nio_nino; /* native ino */ -}; - -static int native_inop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt, - const char *path); -static int native_inop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stbuf); -static int native_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf); -static ssize_t native_filldirentries(struct inode *ino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes); -static int native_inop_mkdir(struct pnode *pno, mode_t mode); -static int native_inop_rmdir(struct pnode *pno); -static int native_inop_symlink(struct pnode *pno, const char *data); -static int native_inop_readlink(struct pnode *pno, char *buf, size_t bufsiz); -static int native_inop_open(struct pnode *pno, int flags, mode_t mode); -static int native_inop_close(struct inode *ino); -static int native_inop_link(struct pnode *old, struct pnode *new); -static int native_inop_unlink(struct pnode *pno); -static int native_inop_rename(struct pnode *old, struct pnode *new); -static int native_inop_read(struct inode *ino, struct ioctx *ioctx); -static int native_inop_write(struct inode *ino, struct ioctx *ioctx); -static _SYSIO_OFF_T native_inop_pos(struct inode *ino, _SYSIO_OFF_T off); -static int native_inop_iodone(struct ioctx *ioctx); -static int native_inop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn); -static int native_inop_sync(struct inode *ino); -static int native_inop_datasync(struct inode *ino); -static int native_inop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap); -static int native_inop_mknod(struct pnode *pno, mode_t mode, dev_t dev); -#ifdef _HAVE_STATVFS -static int native_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf); -#endif -static void native_inop_gone(struct inode *ino); - -static struct inode_ops native_i_ops = { - native_inop_lookup, - native_inop_getattr, - native_inop_setattr, - native_filldirentries, - native_inop_mkdir, - native_inop_rmdir, - native_inop_symlink, - native_inop_readlink, - native_inop_open, - native_inop_close, - native_inop_link, - native_inop_unlink, - native_inop_rename, - native_inop_read, - native_inop_write, - native_inop_pos, - native_inop_iodone, - native_inop_fcntl, - native_inop_sync, - native_inop_datasync, - native_inop_ioctl, - native_inop_mknod, -#ifdef _HAVE_STATVFS - native_inop_statvfs, -#endif - native_inop_gone -}; - -static int native_fsswop_mount(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp); - -static struct fssw_ops native_fssw_ops = { - native_fsswop_mount -}; - -static void native_fsop_gone(struct filesys *fs); - -static struct filesys_ops native_inodesys_ops = { - native_fsop_gone, -}; - -/* - * This example driver plays a strange game. It maintains a private, - * internal mount -- It's own separate, rooted, name space. The local - * file system's entire name space is available via this tree. - * - * This simplifies the implementation. At mount time, we need to generate - * a path-node to be used as a root. This allows us to look up the needed - * node in the host name space and leverage a whole lot of support from - * the system. - */ -static struct mount *native_internal_mount = NULL; - -/* - * Given i-node, return driver private part. - */ -#define I2NI(ino) ((struct native_inode *)((ino)->i_private)) - -/* - * stat -- by path. - */ -static int -native_stat(const char *path, - struct inode *ino, - time_t t, - struct intnl_stat *buf) -{ - struct native_inode *nino; - int err; - struct _sysio_native_stat stbuf; - - nino = ino ? I2NI(ino) : NULL; - - if (path) - err = syscall(SYSIO_SYS_stat, path, &stbuf); - else if (nino && nino->ni_fd >= 0) - err = syscall(SYSIO_SYS_fstat, nino->ni_fd, &stbuf); - else - abort(); - if (err) { - if (nino) - nino->ni_attrtim = 0; - return -errno; - } - if (nino) { - nino->ni_attrtim = t; - SYSIO_COPY_STAT(&stbuf, &ino->i_stbuf); - if (buf) - *buf = ino->i_stbuf; - return 0; - } - if (!buf) - return 0; - SYSIO_COPY_STAT(&stbuf, buf); - return 0; -} - -/* - * Introduce an i-node to the system. - */ -static struct inode * -native_i_new(struct filesys *fs, time_t expiration, struct intnl_stat *buf) -{ - struct native_inode *nino; - struct inode *ino; - - nino = malloc(sizeof(struct native_inode)); - if (!nino) - return NULL; - bzero(&nino->ni_ident, sizeof(nino->ni_ident)); - nino->ni_seekok = 0; - nino->ni_attrvalid = 0; - nino->ni_resetfpos = 0; - nino->ni_ident.dev = buf->st_dev; - nino->ni_ident.ino = buf->st_ino; -#ifdef HAVE_GENERATION - nino->ni_ident.gen = buf->st_gen; -#endif - nino->ni_fileid.fid_data = &nino->ni_ident; - nino->ni_fileid.fid_len = sizeof(nino->ni_ident); - nino->ni_fd = -1; - nino->ni_oflags = 0; - nino->ni_nopens = 0; - nino->ni_fpos = 0; - nino->ni_attrtim = expiration; - ino = - _sysio_i_new(fs, - &nino->ni_fileid, - buf, - 0, - &native_i_ops, - nino); - if (!ino) - free(nino); - return ino; -} - -/* - * Initialize this driver. - */ -int -_sysio_native_init() -{ - - /* - * Capture current process umask and reset our process umask to - * zero. All permission bits to open/creat/setattr are absolute -- - * They've already had a umask applied, when appropriate. - */ -#ifndef REDSTORM - _sysio_umask = syscall(SYSIO_SYS_umask, 0); - /* - * For Red Storm, this functionality is handled in cstart. - * The mask to be "captured" has been sent already. - * This eliminates a system call from every node! - */ -#endif /* REDSTORM */ - - return _sysio_fssw_register("native", &native_fssw_ops); -} - -/* - * Create private, internal, view of the hosts name space. - */ -static int -create_internal_namespace(const void *data) -{ - char *opts; - ssize_t len; - char *cp; - struct native_filesystem *nfs; - int err; - struct mount *mnt; - struct inode *rootino; - struct pnode_base *rootpb; - static struct qstr noname = { NULL, 0, 0 }; - struct filesys *fs; - time_t t; - struct intnl_stat stbuf; - unsigned long ul; - static struct option_value_info v[] = { - { "atimo", "30" }, - { NULL, NULL } - }; - - if (native_internal_mount) { - /* - * Reentered! - */ - abort(); - } - - /* - * Get mount options. - */ - opts = NULL; - if (data && (len = strlen((char *)data))) { - opts = malloc(len + 1); - if (!opts) - return -ENOMEM; - (void )strcpy(opts, data); - if (_sysio_get_args(opts, v) - opts != (ssize_t )len) - return -EINVAL; - } - ul = strtoul(v[0].ovi_value, &cp, 0); - if (*cp != '\0' || ul >= UINT_MAX) - return -EINVAL; - if (opts) { - free(opts); - opts = NULL; - } - - /* - * We maintain an artificial, internal, name space in order to - * have access to fully qualified path names in the various routines. - * Initialize that name space now. - */ - fs = NULL; - mnt = NULL; - rootino = NULL; - rootpb = NULL; - /* - * This really should be per-mount. Hmm, but that's best done - * as proper sub-mounts in the core and not this driver. We reconcile - * now, here, by putting the mount options on the file system. That - * means they are global and only can be passed at the initial mount. - * - * Maybe do it right some day? - */ - nfs = malloc(sizeof(struct native_filesystem)); - if (!nfs) { - err = -ENOMEM; - goto error; - } - nfs->nfs_atimo = ul; - if ((unsigned long)nfs->nfs_atimo != ul) { - err = -EINVAL; - goto error; - } - fs = _sysio_fs_new(&native_inodesys_ops, 0, nfs); - if (!fs) { - err = -ENOMEM; - goto error; - } - - /* - * Get root i-node. - */ - t = _SYSIO_LOCAL_TIME(); - err = native_stat("/", NULL, 0, &stbuf); - if (err) - goto error; - rootino = native_i_new(fs, t + FS2NFS(fs)->nfs_atimo, &stbuf); - if (!rootino) { - err = -ENOMEM; - goto error; - } - - /* - * Generate base path-node for root. - */ - rootpb = _sysio_pb_new(&noname, NULL, rootino); - if (!rootpb) { - err = -ENOMEM; - goto error; - } - - /* - * Mount it. This name space is disconnected from the - * rest of the system -- Only available within this driver. - */ - err = _sysio_do_mount(fs, rootpb, 0, NULL, &mnt); - if (err) - goto error; - - native_internal_mount = mnt; - return 0; -error: - if (mnt) { - if (_sysio_do_unmount(mnt) != 0) - abort(); - nfs = NULL; - fs = NULL; - rootpb = NULL; - rootino = NULL; - } - if (rootpb) - _sysio_pb_gone(rootpb); - if (fs) { - FS_RELE(fs); - nfs = NULL; - } - if (nfs) - free(nfs); - if (opts) - free(opts); - - return err; -} - -static int -native_fsswop_mount(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp) -{ - int err; - struct nameidata nameidata; - struct mount *mnt; - - /* - * Caller must use fully qualified path names when specifying - * the source. - */ - if (*source != '/') - return -ENOENT; - - if (!native_internal_mount) { - err = create_internal_namespace(data); - if (err) - return err; - } else if (data && *(char *)data) - return -EINVAL; - - /* - * Lookup the source in the internally maintained name space. - */ - ND_INIT(&nameidata, 0, source, native_internal_mount->mnt_root, NULL); - err = _sysio_path_walk(native_internal_mount->mnt_root, &nameidata); - if (err) - return err; - - /* - * Have path-node specified by the given source argument. Let the - * system finish the job, now. - */ - err = - _sysio_do_mount(native_internal_mount->mnt_fs, - nameidata.nd_pno->p_base, - flags, - tocover, - &mnt); - /* - * Release the internal name space pnode and clean up any - * aliases we might have generated. We really don't need to cache them - * as they are only used at mount time.. - */ - P_RELE(nameidata.nd_pno); - (void )_sysio_p_prune(native_internal_mount->mnt_root); - - if (!err) { - FS_REF(native_internal_mount->mnt_fs); - *mntp = mnt; - } - return err; -} - -static int -native_i_invalid(struct inode *inop, struct intnl_stat *stat) -{ - struct native_inode *nino; - - /* - * Validate passed in inode against stat struct info - */ - nino = I2NI(inop); - - if (!nino->ni_attrtim || - (nino->ni_ident.dev != stat->st_dev || - nino->ni_ident.ino != stat->st_ino || -#ifdef HAVE_GENERATION - nino->ni_ident.gen != stat->st_gen || -#endif - ((inop)->i_stbuf.st_mode & S_IFMT) != (stat->st_mode & S_IFMT)) || - (((inop)->i_stbuf.st_rdev != stat->st_rdev) && - (S_ISCHR((inop)->i_stbuf.st_mode) || - S_ISBLK((inop)->i_stbuf.st_mode)))) { - nino->ni_attrtim = 0; /* invalidate attrs */ - return 1; - } - return 0; -} - -static struct inode * -native_iget(struct filesys *fs, time_t expire, struct intnl_stat *stbp) -{ - struct inode *ino; - struct native_inode_identifier ident; - struct file_identifier fileid; - - bzero(&ident, sizeof(ident)); - ident.dev = stbp->st_dev; - ident.ino = stbp->st_ino; -#ifdef HAVE_GENERATION - ident.gen = stbp->st_gen; -#endif - fileid.fid_data = &ident; - fileid.fid_len = sizeof(ident); - ino = _sysio_i_find(fs, &fileid); - if (ino) { - ino->i_stbuf = *stbp; - I2NI(ino)->ni_attrtim = expire; - return ino; - } - return native_i_new(fs, expire, stbp); -} - -/* - * Find, and validate, or create i-node by host-relative path. Returned i-node - * is referenced. - */ -static int -native_ibind(struct filesys *fs, - char *path, - time_t t, - struct inode **inop) -{ - struct intnl_stat ostbuf, stbuf; - int err; - struct inode *ino; - - if (*inop) - ostbuf = (*inop)->i_stbuf; - - err = native_stat(path, *inop, t, &stbuf); - if (err) - return err; - - /* - * Validate? - */ - if (*inop) { - if (!native_i_invalid(*inop, &ostbuf)) - return 0; - /* - * Invalidate. - */ - _sysio_i_undead(*inop); - *inop = NULL; - } - - if (!(ino = native_iget(fs, t + FS2NFS(fs)->nfs_atimo, &stbuf))) - return -ENOMEM; - - *inop = ino; - return 0; -} - -static int -native_inop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt __IS_UNUSED, - const char *path __IS_UNUSED) -{ - time_t t; - char *fqpath; - struct filesys *fs; - int err; - - *inop = pno->p_base->pb_ino; - - /* - * Try to use the cached attributes unless the intent - * indicates we are looking up the last component and - * caller wants attributes. In that case, force a refresh. - */ - t = _SYSIO_LOCAL_TIME(); - if (*inop && - (path || !intnt || (intnt->int_opmask & INT_GETATTR) == 0) && - NATIVE_ATTRS_VALID(I2NI(*inop), t)) - return 0; - - /* - * Don't have an inode yet. Because we translate everything back to - * a single name space for the host, we will assume the object the - * caller is looking for has no existing alias in our internal - * name space. We don't see the same file on different mounts in the - * underlying host FS as the same file. - * - * The file identifier *will* be unique. It's got to have a different - * dev. - */ - fqpath = _sysio_pb_path(pno->p_base, '/'); - if (!fqpath) - return -ENOMEM; - fs = pno->p_mount->mnt_fs; - err = native_ibind(fs, fqpath, t + FS2NFS(fs)->nfs_atimo, inop); - free(fqpath); - if (err) - *inop = NULL; - return err; -} - -static int -native_inop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stat) -{ - struct native_inode *nino; - int err; - - /* - * We just cannot use the cached attributes when getattr is - * called. Had the caller felt those were sufficient then - * they could have (would have?) simply used what was cached - * after revalidating. In this case, there's a good chance the - * caller is looking for the current time stamps and/or size. Something - * pretty volatile anyway. - */ - err = 0; /* compiler cookie */ - if (pno) { - char *path; - struct filesys *fs; - time_t t; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - fs = pno->p_mount->mnt_fs; - t = _SYSIO_LOCAL_TIME(); - err = native_stat(path, ino, t + FS2NFS(fs)->nfs_atimo, stat); - free(path); - } else if ((nino = I2NI(ino))->ni_fd >= 0) - /* - * Don't have access to the fs record anymore. Just - * refresh but keep the current timeout. - */ - err = native_stat(NULL, ino, nino->ni_attrtim, stat); - else { - /* - * Dev inodes don't open in this driver. We won't have - * a file descriptor with which to do the deed then. Satisfy - * the request from the cached copy of the attributes. - */ - (void )memcpy(stat, - &ino->i_stbuf, - sizeof(struct intnl_stat)); - err = 0; - } - - return err; -} - -#ifdef SYSIO_SYS_utime -static int -_ut(const char *path, time_t actime, time_t modtime) -{ - struct utimbuf ut; - - ut.actime = actime; - ut.modtime = modtime; - return syscall(SYSIO_SYS_utime, path, &ut); -} -#else -static int -_ut(const char *path, time_t actime, time_t modtime) -{ - struct timeval tv[2]; - - tv[0].tv_sec = actime; - tv[0].tv_usec = 0; - tv[1].tv_sec = modtime; - tv[1].tv_usec = 0; - return syscall(SYSIO_SYS_utimes, path, &tv); -} -#endif - -static int -native_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stat) -{ - char *path; - struct native_inode *nino; - int fd; - int err; - - path = NULL; - nino = ino ? I2NI(ino) : NULL; - fd = -1; - if (nino) - fd = nino->ni_fd; - if (fd < 0 || mask & (SETATTR_MTIME|SETATTR_ATIME)) { - if (!pno) - return -EEXIST; - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - } - - /* - * Get current status for undo. - */ - err = native_stat(path, ino, 0, NULL); - if (err) - goto out; - - if (mask & SETATTR_MODE) { - mode_t mode; - - /* - * Alter permissions attribute. - */ - mode = stat->st_mode & 07777; - err = - fd < 0 - ? syscall(SYSIO_SYS_chmod, path, mode) - : syscall(SYSIO_SYS_fchmod, fd, mode); - if (err) - err = -errno; - } - if (err) - mask &= ~SETATTR_MODE; - else if (mask & (SETATTR_MTIME|SETATTR_ATIME)) { - time_t actime, modtime; - - /* - * Alter access and/or modify time attributes. - */ - actime = ino->i_stbuf.st_atime; - modtime = ino->i_stbuf.st_mtime; - if (mask & SETATTR_ATIME) - actime = stat->st_atime; - if (mask & SETATTR_MTIME) - modtime = stat->st_mtime; - if (_ut(path, actime, modtime) != 0) - return -errno; - } - if (err) - mask &= ~(SETATTR_MTIME|SETATTR_ATIME); - else if (mask & (SETATTR_UID|SETATTR_GID)) { - - /* - * Alter owner and/or group identifiers. - */ - err = - fd < 0 - ? syscall(SYSIO_SYS_chown, - path, - mask & SETATTR_UID - ? stat->st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? stat->st_gid - : (gid_t )-1) - : syscall(SYSIO_SYS_fchown, - fd, - mask & SETATTR_UID - ? stat->st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? stat->st_gid - : (gid_t )-1); - if (err) - err = -errno; - } - if (err) - mask &= ~(SETATTR_UID|SETATTR_GID); - else if (mask & SETATTR_LEN) { - /* - * Do the truncate last. It can't be undone. - */ - err = fd < 0 - ? syscall(SYSIO_SYS_truncate, path, stat->st_size) - : syscall(SYSIO_SYS_ftruncate, fd, stat->st_size); - if (err) - err = -errno; - } - if (!err) - goto out; - /* - * Undo after error. Some or all of this might not work... We - * can but try. - */ - if (mask & (SETATTR_UID|SETATTR_GID)) { - (void )(fd < 0 - ? syscall(SYSIO_SYS_chown, - path, - mask & SETATTR_UID - ? ino->i_stbuf.st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? ino->i_stbuf.st_gid - : (gid_t )-1) - : syscall(SYSIO_SYS_fchown, - fd, - mask & SETATTR_UID - ? ino->i_stbuf.st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? ino->i_stbuf.st_gid - : (gid_t )-1)); - } - if (mask & (SETATTR_MTIME|SETATTR_ATIME)) - (void )_ut(path, ino->i_stbuf.st_atime, ino->i_stbuf.st_mtime); - if (mask & SETATTR_MODE) { - fd < 0 - ? syscall(SYSIO_SYS_chmod, path, ino->i_stbuf.st_mode & 07777) - : syscall(SYSIO_SYS_fchmod, ino->i_stbuf.st_mode & 07777); - } -out: - /* - * We must refresh the cached attributes. - */ - if (!err && native_stat(path, ino, _SYSIO_LOCAL_TIME(), NULL) != 0) - abort(); - if (path) - free(path); - return err; -} - -static int -native_pos(int fd, _SYSIO_OFF_T *offset, int whence) -{ - _SYSIO_OFF_T off; - - assert(fd >= 0); - assert(*offset >= 0); - - off = *offset; -#if defined(_LARGEFILE64_SOURCE) && defined(SYSIO_SYS__llseek) - { - int err; - err = - syscall(SYSIO_SYS__llseek, - (unsigned int)fd, - (unsigned int)(off >> 32), - (unsigned int)off, - &off, - whence); - if (err == -1) - return -errno; - } -#else - off = - syscall(SYSIO_SYS_lseek, - fd, - off, - whence); - if (off == -1) - return -errno; -#endif - *offset = off; - - return 0; -} - -static ssize_t -native_ifilldirentries(struct native_inode *nino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes) -{ - int err; - ssize_t cc; -#if defined(SYSIO_SYS_getdirentries) - _SYSIO_OFF_T waste; -#endif - - if (*posp < 0) - return -EINVAL; - - /* - * Stream-oriented access requires that we reposition prior to the - * fill call. - */ - assert(nino->ni_seekok); - if (*posp != nino->ni_fpos || nino->ni_resetfpos) { - nino->ni_fpos = *posp; - err = native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_SET); - if (err) { - nino->ni_resetfpos = 1; - return err; - } - nino->ni_resetfpos = 0; - } - - cc = -#if defined(SYSIO_SYS_getdirentries) - syscall(SYSIO_SYS_getdirentries, - nino->ni_fd, - buf, - nbytes, - &waste); -#elif defined(SYSIO_SYS_getdents64) - syscall(SYSIO_SYS_getdents64, nino->ni_fd, buf, nbytes); -#elif defined(SYSIO_SYS_getdents) - syscall(SYSIO_SYS_getdents, nino->ni_fd, buf, nbytes); -#endif - - if (cc < 0) - return -errno; - /* - * Stream-oriented access requires that we discover where we are - * after the call. - */ - if ((err = native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_CUR)) != 0) { - /* - * Leave the position at the old I suppose. - */ - nino->ni_resetfpos = 1; - return err; - } - *posp = nino->ni_fpos; - return cc; -} - -static ssize_t -native_filldirentries(struct inode *ino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes) -{ - struct native_inode *nino = I2NI(ino); -#if DIR_CVT_64 - char *bp; - size_t count; - struct linux_dirent *ldp; - struct dirent64 *d64p; - size_t namlen; - size_t reclen; -#else -#define bp buf -#define count nbytes -#endif - ssize_t cc; - - assert(nino->ni_fd >= 0); - -#if DIR_CVT_64 - count = nbytes; - while (!(bp = malloc(count))) { - count /= 2; - if (count < sizeof(struct dirent)) - return -ENOMEM; - } -#endif - cc = native_ifilldirentries(nino, posp, bp, count); - if (cc < 0) { -#if DIR_CVT_64 - free(bp); -#endif - return cc; - } -#if DIR_CVT_64 - ldp = (struct linux_dirent *)bp; - d64p = (struct dirent64 *)buf; - while (cc) { - namlen = strlen(ldp->ld_name); - reclen = sizeof(*d64p) - sizeof(d64p->d_name) + namlen; - if (nbytes <= reclen) - break; - d64p->d_ino = ldp->ld_ino; - d64p->d_off = nino->ni_fpos = ldp->ld_off; - d64p->d_reclen = - (((reclen + sizeof(long))) / sizeof(long)) * sizeof(long); - if (nbytes < d64p->d_reclen) - d64p->d_reclen = reclen + 1; - d64p->d_type = DT_UNKNOWN; /* you lose -- sorry. */ - (void )memcpy(d64p->d_name, ldp->ld_name, namlen); - /* - * Zero pad the rest. - */ - for (cp = d64p->d_name + namlen, n = d64p->d_reclen - reclen; - n; - n--) - *cp++ = 0; - cc -= ldp->ld_reclen; - ldp = (struct linux_dirent *)((char *)ldp + ldp->ld_reclen); - nbytes -= d64p->d_reclen; - d64p = (struct dirent64 *)((char *)d64p + d64p->d_reclen); - } - free(bp); - cc = - (d64p == (struct dirent64 *)buf && cc) - ? -EINVAL - : (char *)d64p - buf; -#else -#undef bp -#undef count -#endif - return cc; -} - -static int -native_inop_mkdir(struct pnode *pno, mode_t mode) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = syscall(SYSIO_SYS_mkdir, path, mode); - if (err != 0) - err = -errno; - free(path); - return err; -} - -static int -native_inop_rmdir(struct pnode *pno) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = syscall(SYSIO_SYS_rmdir, path); - if (err != 0) - err = -errno; - free(path); - return err; -} - -static int -native_inop_symlink(struct pnode *pno, const char *data) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = syscall(SYSIO_SYS_symlink, data, path); - if (err != 0) - err = -errno; - free(path); - return err; -} - -static int -native_inop_readlink(struct pnode *pno, char *buf, size_t bufsiz) -{ - char *path; - int i; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - i = syscall(SYSIO_SYS_readlink, path, buf, bufsiz); - if (i < 0) - i = -errno; - free(path); - return i; -} - -static int -native_inop_open(struct pnode *pno, int flags, mode_t mode) -{ - struct native_inode *nino; - char *path; - int fd; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - /* - * Whether the file is already open, or not, makes no difference. - * Want to always give the host OS a chance to authorize in case - * something has changed underneath us. - */ - if (flags & O_WRONLY) { - /* - * Promote write-only attempt to RW. - */ - flags &= ~O_WRONLY; - flags |= O_RDWR; - } -#ifdef O_LARGEFILE - flags |= O_LARGEFILE; -#endif - fd = syscall(SYSIO_SYS_open, path, flags, mode); - if (!pno->p_base->pb_ino && fd >= 0) { - struct filesys *fs; - int err; - - /* - * Success but we need to return an i-node. - */ - fs = pno->p_mount->mnt_fs; - err = - native_ibind(fs, - path, - _SYSIO_LOCAL_TIME() + FS2NFS(fs)->nfs_atimo, - &pno->p_base->pb_ino); - if (err) { - (void )syscall(SYSIO_SYS_close, fd); - if (err == -EEXIST) - abort(); - fd = err; - } - } - free(path); - if (fd < 0) - return -errno; - - /* - * Remember this new open. - */ - nino = I2NI(pno->p_base->pb_ino); - nino->ni_nopens++; - assert(nino->ni_nopens); - - if (nino->ni_fd >= 0) { - if ((nino->ni_oflags & O_RDWR) || - (flags & (O_RDONLY|O_WRONLY|O_RDWR)) == O_RDONLY) { - /* - * Keep existing. - */ - (void )syscall(SYSIO_SYS_close, fd); - return 0; - } - (void )syscall(SYSIO_SYS_close, nino->ni_fd); - } - /* - * Invariant; First open. Must init. - */ - nino->ni_resetfpos = 0; - nino->ni_fpos = 0; - nino->ni_fd = fd; - /* - * Need to know whether we can seek on this - * descriptor. - */ - nino->ni_seekok = - native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_CUR) != 0 ? 0 : 1; - - return 0; -} - -static int -native_inop_close(struct inode *ino) -{ - struct native_inode *nino = I2NI(ino); - int err; - - if (nino->ni_fd < 0) - abort(); - - assert(nino->ni_nopens); - if (--nino->ni_nopens) { - /* - * Hmmm. We really don't need anything else. However, some - * filesystems try to implement a sync-on-close semantic. - * As this appears now, that is lost. Might want to change - * it somehow in the future? - */ - return 0; - } - - err = syscall(SYSIO_SYS_close, nino->ni_fd); - if (err) - return -errno; - - nino->ni_fd = -1; - nino->ni_resetfpos = 0; - nino->ni_fpos = 0; - return 0; -} - -static int -native_inop_link(struct pnode *old, struct pnode *new) -{ - int err; - char *opath, *npath; - - err = 0; - - opath = _sysio_pb_path(old->p_base, '/'); - npath = _sysio_pb_path(new->p_base, '/'); - if (!(opath && npath)) { - err = -ENOMEM; - goto out; - } - - err = syscall(SYSIO_SYS_link, opath, npath); - if (err != 0) - err = -errno; - -out: - if (opath) - free(opath); - if (npath) - free(npath); - return err; -} - -static int -native_inop_unlink(struct pnode *pno) -{ - char *path; - int err = 0; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - /* - * For this driver, unlink is easy with open files. Since the - * file remains open to the system, too, the descriptors are still - * valid. - * - * Other drivers will have some difficulty here as the entry in the - * file system name space must be removed without sacrificing access - * to the file itself. In NFS this is done with a mechanism referred - * to as a `silly delete'. The file is moved to a temporary name - * (usually .NFSXXXXXX, where the X's are replaced by the PID and some - * unique characters) in order to simulate the proper semantic. - */ - if (syscall(SYSIO_SYS_unlink, path) != 0) - err = -errno; - free(path); - return err; -} - -static int -native_inop_rename(struct pnode *old, struct pnode *new) -{ - int err; - char *opath, *npath; - - opath = _sysio_pb_path(old->p_base, '/'); - npath = _sysio_pb_path(new->p_base, '/'); - if (!(opath && npath)) { - err = -ENOMEM; - goto out; - } - - err = syscall(SYSIO_SYS_rename, opath, npath); - if (err != 0) - err = -errno; - -out: - if (opath) - free(opath); - if (npath) - free(npath); - return err; -} - -static ssize_t -dopio(void *buf, size_t count, _SYSIO_OFF_T off, struct native_io *nio) -{ - ssize_t cc; - - if (!nio->nio_nino->ni_seekok) { - if (off != nio->nio_nino->ni_fpos) { - /* - * They're trying to reposition. Can't - * seek on this descriptor so we err out now. - */ - errno = ESPIPE; - return -1; - } - cc = - syscall(nio->nio_op == 'r' - ? SYSIO_SYS_read - : SYSIO_SYS_write, - nio->nio_nino->ni_fd, - buf, - count); - if (cc > 0) - nio->nio_nino->ni_fpos += cc; - } else - cc = - syscall((nio->nio_op == 'r' - ? SYSIO_SYS_pread - : SYSIO_SYS_pwrite), - nio->nio_nino->ni_fd, - buf, - count, - off); - - return cc; -} - -static ssize_t -doiov(const struct iovec *iov, - int count, - _SYSIO_OFF_T off, - ssize_t limit, - struct native_io *nio) -{ - ssize_t cc; - -#if !(defined(REDSTORM) || defined(MAX_IOVEC)) -#define MAX_IOVEC INT_MAX -#endif - - - if (count <= 0) - return -EINVAL; - - /* - * Avoid the reposition call if we're already at the right place. - * Allows us to access pipes and fifos. - */ - if (off != nio->nio_nino->ni_fpos) { - int err; - - err = native_pos(nio->nio_nino->ni_fd, &off, SEEK_SET); - if (err) { - nio->nio_nino->ni_resetfpos = 1; - return err; - } - nio->nio_nino->ni_resetfpos = 0; - nio->nio_nino->ni_fpos = off; - } - - /* - * The {read,write}v is safe as this routine is only ever called - * by _sysio_enumerate_extents() and that routine is exact. It never - * passes iovectors including tails. - */ - cc = -#ifndef REDSTORM - count <= MAX_IOVEC - ? syscall(nio->nio_op == 'r' ? SYSIO_SYS_readv : SYSIO_SYS_writev, - nio->nio_nino->ni_fd, - iov, - count) - : -#endif - _sysio_enumerate_iovec(iov, - count, - off, - limit, - (ssize_t (*)(void *, - size_t, - _SYSIO_OFF_T, - void *))dopio, - nio); - if (cc < 0) - cc = -errno; - else - nio->nio_nino->ni_fpos += cc; - return cc; - -#if !(defined(REDSTORM) || defined(MAX_IOVEC)) -#undef MAX_IOVEC -#endif -} - -#if 0 -static int -lockop_all(struct native_inode *nino, - struct intnl_xtvec *xtv, - size_t count, - short op) -{ - struct flock flock; - int err; - - if (!count) - return -EINVAL; - flock.l_type = op; - flock.l_whence = SEEK_SET; - while (count--) { - flock.l_start = xtv->xtv_off; - flock.l_len = xtv->xtv_len; - xtv++; - err = - syscall(SYSIO_SYS_fcntl, - nino->ni_fd, - F_SETLK, - &flock); - if (err != 0) - return -errno; - } - return 0; -} - -static int -order_xtv(const struct intnl_xtvec *xtv1, const struct intnl_xtvec *xtv2) -{ - - if (xtv1->xtv_off < xtv2->xtv_off) - return -1; - if (xtv1->xtv_off > xtv2->xtv_off) - return 1; - return 0; -} -#endif - -static int -doio(char op, struct ioctx *ioctx) -{ - struct native_inode *nino; -#if 0 - int dolocks; - struct intnl_xtvec *oxtv; - int err; -#endif - struct native_io arguments; - ssize_t cc; -#if 0 - struct intnl_xtvec *front, *rear, tmp; -#endif - - nino = I2NI(ioctx->ioctx_ino); -#if 0 - dolocks = ioctx->ioctx_xtvlen > 1 && nino->ni_seekok; - if (dolocks) { - /* - * Must lock the regions (in order!) since we can't do - * strided-IO as a single atomic operation. - */ - oxtv = malloc(ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec)); - if (!oxtv) - return -ENOMEM; - (void )memcpy(oxtv, - ioctx->ioctx_xtv, - ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec)); - qsort(oxtv, - ioctx->ioctx_xtvlen, - sizeof(struct intnl_xtvec), - (int (*)(const void *, const void *))order_xtv); - err = - lockop_all(nino, - oxtv, ioctx->ioctx_xtvlen, - op == 'r' ? F_RDLCK : F_WRLCK); - if (err) { - free(oxtv); - return err; - } - } -#endif - arguments.nio_op = op; - arguments.nio_nino = nino; - cc = - _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, - ioctx->ioctx_iov, ioctx->ioctx_iovlen, - (ssize_t (*)(const struct iovec *, - int, - _SYSIO_OFF_T, - ssize_t, - void *))doiov, - &arguments); -#if 0 - if (dolocks) { - /* - * Must unlock in reverse order. - */ - front = oxtv; - rear = front + ioctx->ioctx_xtvlen - 1; - while (front < rear) { - tmp = *front; - *front++ = *rear; - *rear-- = tmp; - } - if (lockop_all(nino, oxtv, ioctx->ioctx_xtvlen, F_UNLCK) != 0) - abort(); - free(oxtv); - } -#endif - if ((ioctx->ioctx_cc = cc) < 0) { - ioctx->ioctx_errno = -ioctx->ioctx_cc; - ioctx->ioctx_cc = -1; - } - return 0; -} - -static int -native_inop_read(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) -{ - - return doio('r', ioctx); -} - -static int -native_inop_write(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) -{ - - return doio('w', ioctx); -} - -static _SYSIO_OFF_T -native_inop_pos(struct inode *ino, _SYSIO_OFF_T off) -{ - struct native_inode *nino = I2NI(ino); - int err; - - err = native_pos(nino->ni_fd, &off, SEEK_SET); - return err < 0 ? err : off; -} - -static int -native_inop_iodone(struct ioctx *ioctxp __IS_UNUSED) -{ - - /* - * It's always done in this driver. It completed when posted. - */ - return 1; -} - -static int -native_inop_fcntl(struct inode *ino, - int cmd, - va_list ap, - int *rtn) -{ - struct native_inode *nino = I2NI(ino); - long arg; - int err; - - if (nino->ni_fd < 0) - abort(); - - err = 0; - switch (cmd) { - case F_GETFD: - case F_GETFL: -#ifdef F_GETOWN - case F_GETOWN: -#endif - *rtn = syscall(SYSIO_SYS_fcntl, nino->ni_fd, cmd); - if (*rtn == -1) - err = -errno; - break; - case F_DUPFD: - case F_SETFD: - case F_SETFL: - case F_GETLK: - case F_SETLK: - case F_SETLKW: -#ifdef F_SETOWN - case F_SETOWN: -#endif - arg = va_arg(ap, long); - *rtn = syscall(SYSIO_SYS_fcntl, nino->ni_fd, cmd, arg); - if (*rtn == -1) - err = -errno; - break; - default: - *rtn = -1; - err = -EINVAL; - } - return err; -} - -static int -native_inop_mknod(struct pnode *pno __IS_UNUSED, - mode_t mode __IS_UNUSED, - dev_t dev __IS_UNUSED) -{ - - return -ENOSYS; -} - -#ifdef _HAVE_STATVFS -static int -native_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf) -{ - char *path; - int rc; - struct statfs fs; - - path = NULL; - if (!ino || I2NI(ino)->ni_fd < 0) { - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - } - - /* - * The syscall interface does not support SYSIO_SYS_fstatvfs. - * Should possibly return ENOSYS, but thought it - * better to use SYSIO_SYS_fstatfs and fill in as much of - * the statvfs structure as possible. This allows - * for more of a test of the sysio user interface. - */ - rc = - path - ? syscall(SYSIO_SYS_statfs, path, &fs) - : syscall(SYSIO_SYS_fstatfs, I2NI(ino)->ni_fd, &fs); - if (path) - free(path); - if (rc < 0) - return -errno; - - buf->f_bsize = fs.f_bsize; /* file system block size */ - buf->f_frsize = fs.f_bsize; /* file system fundamental block size */ - buf->f_blocks = fs.f_blocks; - buf->f_bfree = fs.f_bfree; - buf->f_bavail = fs.f_bavail; - buf->f_files = fs.f_files; /* Total number serial numbers */ - buf->f_ffree = fs.f_ffree; /* Number free serial numbers */ - buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/ - buf->f_fsid = fs.f_fsid.__val[1]; - buf->f_flag = 0; /* No equiv in statfs; maybe use type? */ - buf->f_namemax = fs.f_namelen; - return 0; -} -#endif - -static int -native_inop_sync(struct inode *ino) -{ - int err; - - assert(I2NI(ino)->ni_fd >= 0); - - err = syscall(SYSIO_SYS_fsync, I2NI(ino)->ni_fd); - if (err) - err = -errno; - return err; -} - -static int -native_inop_datasync(struct inode *ino) -{ - struct native_inode *nino; - int err; - - nino = I2NI(ino); - assert(nino->ni_fd >= 0); - -#ifdef SYSIO_SYS_fdatasync - err = syscall(SYSIO_SYS_fdatasync, I2NI(ino)->ni_fd); -#else -#if 0 -#warning No fdatasync system call -- Using fsync instead! -#endif - err = syscall(SYSIO_SYS_fsync, I2NI(ino)->ni_fd); -#endif - if (err) - err = -errno; - return err; -} - -#ifdef HAVE_LUSTRE_HACK -static int -native_inop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap) -{ - struct native_inode *nino; - long arg1, arg2, arg3, arg4; - int rtn; - - nino = I2NI(ino); - assert(nino->ni_fd >= 0); - arg1 = va_arg(ap, long); - arg2 = va_arg(ap, long); - arg3 = va_arg(ap, long); - arg4 = va_arg(ap, long); - - rtn = - syscall(SYSIO_SYS_ioctl, I2NI(ino)->ni_fd, request, - arg1, arg2, arg3, arg4); - if (rtn < 0) - rtn = -errno; - return rtn; -} -#else -static int -native_inop_ioctl(struct inode *ino __IS_UNUSED, - unsigned long int request __IS_UNUSED, - va_list ap __IS_UNUSED) -{ - - /* - * I'm lazy. Maybe implemented later. - */ - return -ENOTTY; -} -#endif - -static void -native_inop_gone(struct inode *ino) -{ - struct native_inode *nino = I2NI(ino); - - if (nino->ni_fd >= 0) - (void )syscall(SYSIO_SYS_close, nino->ni_fd); - - free(ino->i_private); -} - -static void -native_fsop_gone(struct filesys *fs __IS_UNUSED) -{ - - free(fs->fs_private); - /* - * Do nothing. There is no private part maintained for the - * native file interface. - */ -} diff --git a/libsysio/drivers/native/fs_native.h b/libsysio/drivers/native/fs_native.h deleted file mode 100644 index 1590379d195be916074f99004f27ec5cb4d0e413..0000000000000000000000000000000000000000 --- a/libsysio/drivers/native/fs_native.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Native file system driver support. - */ - -extern int _sysio_native_init(void); diff --git a/libsysio/drivers/native/module.mk b/libsysio/drivers/native/module.mk deleted file mode 100644 index 8cada8a86f7d12487cda634fba1d69ee2c2be318..0000000000000000000000000000000000000000 --- a/libsysio/drivers/native/module.mk +++ /dev/null @@ -1,2 +0,0 @@ -NATIVE_SRCS = drivers/native/fs_native.c -NATIVE_EXTRA = drivers/native/fs_native.h drivers/native/module.mk diff --git a/libsysio/drivers/sockets/.cvsignore b/libsysio/drivers/sockets/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/drivers/sockets/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/drivers/sockets/module.mk b/libsysio/drivers/sockets/module.mk deleted file mode 100644 index 261fcfaa66b9c7d1bf8a2741a33497494213a3ef..0000000000000000000000000000000000000000 --- a/libsysio/drivers/sockets/module.mk +++ /dev/null @@ -1,2 +0,0 @@ -SOCKETS_SRCS = drivers/sockets/sockets.c -SOCKETS_EXTRA = drivers/sockets/module.mk diff --git a/libsysio/drivers/sockets/sockets.c b/libsysio/drivers/sockets/sockets.c deleted file mode 100644 index d311ae1b7a3d73245e813c6f593870493ce24a12..0000000000000000000000000000000000000000 --- a/libsysio/drivers/sockets/sockets.c +++ /dev/null @@ -1,633 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#include <stdio.h> /* for NULL */ -#include <stdlib.h> -#ifdef __linux__ -#include <string.h> -#endif -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <syscall.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/fcntl.h> -#include <sys/syscall.h> -#include <sys/socket.h> -#ifdef __linux__ -#include <linux/net.h> -#endif -#include <sys/uio.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "native.h" -#include "fs.h" -#include "inode.h" -#include "file.h" -#include "dev.h" /* _sysio_nodev_ops */ - -/* - * Sockets interface driver - */ - -/* - * Sockets file identifiers format. - */ -struct sockets_ino_identifier { - ino_t inum; /* i-number */ -}; - -/* - * Driver-private i-node information we keep about in-use sockets. - */ -struct socket_info { - struct sockets_ino_identifier ski_ident; /* unique identifier */ - struct file_identifier ski_fileid; /* ditto */ - int ski_fd; /* host fildes */ -}; - -static int sockets_inop_close(struct inode *ino); -static int sockets_inop_read(struct inode *ino, - struct ioctx *ioctx); -static int sockets_inop_write(struct inode *ino, - struct ioctx *ioctxp); -static _SYSIO_OFF_T sockets_inop_pos(struct inode *ino, - _SYSIO_OFF_T off); -static int sockets_inop_iodone(struct ioctx *ioctx); -static int sockets_inop_sync(struct inode *ino); -static int sockets_inop_datasync(struct inode *ino); -static int sockets_inop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn); -static int sockets_inop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap); -static void sockets_inop_gone(struct inode *ino); -static void sockets_illop(void); - -/* - * Given i-node, return driver private part. - */ -#define I2SKI(ino) ((struct socket_info *)((ino)->i_private)) - -struct filesys_ops sockets_filesys_ops = { - (void (*)(struct filesys *))sockets_illop -}; - -static struct filesys *sockets_fs = NULL; - -static struct inode_ops sockets_i_ops; - -/* - * Initialize this driver. - */ -int -_sysio_sockets_init() -{ - - assert(!sockets_fs); - - sockets_i_ops = _sysio_nodev_ops; - sockets_i_ops.inop_close = sockets_inop_close; - sockets_i_ops.inop_read = sockets_inop_read; - sockets_i_ops.inop_write = sockets_inop_write; - sockets_i_ops.inop_pos = sockets_inop_pos; - sockets_i_ops.inop_iodone = sockets_inop_iodone; - sockets_i_ops.inop_fcntl = sockets_inop_fcntl; - sockets_i_ops.inop_sync = sockets_inop_sync; - sockets_i_ops.inop_datasync = sockets_inop_datasync; - sockets_i_ops.inop_ioctl = sockets_inop_ioctl; - sockets_i_ops.inop_gone = sockets_inop_gone; - - sockets_fs = _sysio_fs_new(&sockets_filesys_ops, 0, NULL); - if (!sockets_fs) - return -ENOMEM; - - return 0; -} - -static int -sockets_inop_close(struct inode *ino) -{ - struct socket_info *ski = I2SKI(ino); - int err; - - if (ski->ski_fd < 0) - return -EBADF; - - err = syscall(SYSIO_SYS_close, ski->ski_fd); - if (err) - return -errno; - ski->ski_fd = -1; - return 0; -} - -/* - * A helper function performing the real IO operation work. - * - * We don't really have async IO. We'll just perform the function - * now. - */ -static int -doio(ssize_t (*f)(int, const struct iovec *, int), - struct inode *ino, - struct ioctx *ioctx) -{ - struct socket_info *ski = I2SKI(ino); - - assert(ski->ski_fd >= 0); - - /* XXX there's no way to check the position - * here we only could ingore the extends - */ - if (ioctx->ioctx_xtvlen != 1) - return -EINVAL; - - if (ioctx->ioctx_iovlen && (int) ioctx->ioctx_iovlen < 0) - return -EINVAL; - - /* - * Call the appropriate (read/write) IO function to - * transfer the data now. - */ - ioctx->ioctx_cc = - (*f)(ski->ski_fd, ioctx->ioctx_iov, ioctx->ioctx_iovlen); - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = errno; - - ioctx->ioctx_done = 1; - return 0; -} - -/* - * Helper function passed to doio(), above, to accomplish a real readv. - */ -static ssize_t -_readv(int fd, const struct iovec *vector, int count) -{ - - return syscall(SYSIO_SYS_readv, fd, vector, count); -} - -static int -sockets_inop_read(struct inode *ino, - struct ioctx *ioctx) -{ - - return doio(_readv, ino, ioctx); -} - -/* - * Helper function passed to doio(), above, to accomplish a real writev. - */ -static ssize_t -_writev(int fd, const struct iovec *vector, int count) -{ - - return syscall(SYSIO_SYS_writev, fd, vector, count); -} - -static int -sockets_inop_write(struct inode *ino, - struct ioctx *ioctx) -{ - - return doio(_writev, ino, ioctx); -} - -static _SYSIO_OFF_T -sockets_inop_pos(struct inode *ino __IS_UNUSED, _SYSIO_OFF_T off __IS_UNUSED) -{ - return -EINVAL; -} - -static int -sockets_inop_iodone(struct ioctx *ioctxp __IS_UNUSED) -{ - - /* - * It's always done in this driver. It completed when posted. - */ - return 1; -} - -static int -sockets_inop_fcntl(struct inode *ino __IS_UNUSED, - int cmd __IS_UNUSED, - va_list ap __IS_UNUSED, - int *rtn) -{ - long arg; - - assert(I2SKI(ino)->ski_fd >= 0); - - switch (cmd) { - case F_GETFD: - case F_GETFL: - case F_GETOWN: - *rtn = syscall(SYSIO_SYS_fcntl, I2SKI(ino)->ski_fd, cmd); - break; - case F_DUPFD: - case F_SETFD: - case F_SETFL: - case F_GETLK: - case F_SETLK: - case F_SETLKW: - case F_SETOWN: - arg = va_arg(ap, long); - *rtn = syscall(SYSIO_SYS_fcntl, I2SKI(ino)->ski_fd, cmd, arg); - break; - default: - *rtn = -1; - errno = EINVAL; - } - return *rtn == -1 ? -errno : 0; -} - -static int -sockets_inop_sync(struct inode *ino) -{ - - assert(I2SKI(ino)->ski_fd >= 0); - - return syscall(SYSIO_SYS_fsync, I2SKI(ino)->ski_fd); -} - -static int -sockets_inop_datasync(struct inode *ino) -{ - - assert(I2SKI(ino)->ski_fd >= 0); - - return syscall(SYSIO_SYS_fdatasync, I2SKI(ino)->ski_fd); -} - -#ifdef HAVE_LUSTRE_HACK -/* - * we blindly extract 4 params and pass to host kernel, the stack - * should be ok. hope no ioctl will consume more then 4 params... - */ -static int -sockets_inop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap) -{ - long arg1, arg2, arg3, arg4; - - assert(I2SKI(ino)->ski_fd >= 0); - - arg1 = va_arg(ap, long); - arg2 = va_arg(ap, long); - arg3 = va_arg(ap, long); - arg4 = va_arg(ap, long); - - return syscall(SYSIO_SYS_ioctl, I2SKI(ino)->ski_fd, request, - arg1, arg2, arg3, arg4); -} -#else -static int -sockets_inop_ioctl(struct inode *ino __IS_UNUSED, - unsigned long int request __IS_UNUSED, - va_list ap __IS_UNUSED) -{ - /* - * I'm lazy. Maybe implemented later. - */ - return -ENOTTY; -} -#endif - -static void -sockets_inop_gone(struct inode *ino) -{ - - (void )sockets_inop_close(ino); - free(ino->i_private); -} - -static void -sockets_illop(void) -{ - - abort(); -} - -static struct inode * -_sysio_sockets_inew() -{ - static ino_t inum = 1; - struct socket_info *ski; - struct inode *ino; - static struct intnl_stat zero_stat; - - ski = malloc(sizeof(struct socket_info)); - if (!ski) - return NULL; - ski->ski_ident.inum = inum++; - ski->ski_fileid.fid_data = &ski->ski_ident; - ski->ski_fileid.fid_len = sizeof(ski->ski_ident); - ski->ski_fd = -1; - - ino = - _sysio_i_new(sockets_fs, - &ski->ski_fileid, - &zero_stat, - 0, - &sockets_i_ops, - ski); - if (!ino) - free(ski); - - return ino; -} - -int -SYSIO_INTERFACE_NAME(socket)(int domain, int type, int protocol) -{ - int err; - struct inode *ino; - struct socket_info *ski; - struct file *fil; - - err = 0; - fil = NULL; - - ino = _sysio_sockets_inew(); - if (!ino) { - err = -ENOMEM; - goto error; - } - - ski = I2SKI(ino); -#ifndef SYSIO_SYS_socketcall - ski->ski_fd = syscall(SYSIO_SYS_socket, domain, type, protocol); -#else - { - unsigned long avec[3] = {domain, type, protocol}; - ski->ski_fd = - syscall(SYSIO_SYS_socketcall, SYS_SOCKET, avec); - } -#endif - if (ski->ski_fd < 0) { - err = -errno; - goto error; - } - - fil = _sysio_fnew(ino, O_RDWR); - if (!fil) { - err = -ENOMEM; - goto error; - } - -#ifdef HAVE_LUSTRE_HACK - err = _sysio_fd_set(fil, ski->ski_fd, 1); -#else - err = _sysio_fd_set(fil, -1, 0); -#endif - if (err < 0) - goto error; - - return err; - -error: - if (fil) - F_RELE(fil); - if (ino) - I_RELE(ino); - - errno = -err; - return -1; -} - -int -SYSIO_INTERFACE_NAME(accept)(int s, struct sockaddr *addr, socklen_t *addrlen) -{ - int err; - struct inode *ino; - struct socket_info *ski; - struct file *ofil, *nfil; - - err = 0; - nfil = NULL; - ino = NULL; - - ofil = _sysio_fd_find(s); - if (!ofil) { - err = -EBADF; - goto error; - } - - ino = _sysio_sockets_inew(); - if (!ino) { - err = -ENOMEM; - goto error; - } - - nfil = _sysio_fnew(ino, O_RDWR); - if (!nfil) { - err = -ENOMEM; - goto error; - } - - ski = I2SKI(ino); -#ifndef SYSIO_SYS_socketcall - ski->ski_fd = - syscall(SYSIO_SYS_accept, - I2SKI(ofil->f_ino)->ski_fd, - addr, - addrlen); -#else - { - unsigned long avec[3] = { - (unsigned long) I2SKI(ofil->f_ino)->ski_fd, - (unsigned long) addr, - (unsigned long) addrlen}; - ski->ski_fd = - syscall(SYSIO_SYS_socketcall, SYS_ACCEPT, avec); - } -#endif - if (ski->ski_fd < 0) { - err = -errno; - goto error; - } - -#ifdef HAVE_LUSTRE_HACK - err = _sysio_fd_set(nfil, ski->ski_fd, 1); -#else - err = _sysio_fd_set(nfil, -1, 0); -#endif - if (err < 0) - goto error; - - return err; - -error: - if (nfil) - F_RELE(nfil); - if (ino) - I_RELE(ino); - - errno = -err; - return -1; -} - -int -SYSIO_INTERFACE_NAME(bind)(int sockfd, - const struct sockaddr *my_addr, - socklen_t addrlen) -{ - int err; - struct file *fil; -#ifdef SYSIO_SYS_socketcall - unsigned long avec[3]; -#endif - - err = 0; - - fil = _sysio_fd_find(sockfd); - if (!fil) { - err = -EBADF; - goto out; - } - -#ifndef SYSIO_SYS_socketcall - if (syscall(SYSIO_SYS_bind, - I2SKI(fil->f_ino)->ski_fd, - my_addr, - addrlen)) { -#else - avec[0] = I2SKI(fil->f_ino)->ski_fd; - avec[1] = (unsigned long )my_addr; - avec[2] = addrlen; - if (syscall(SYSIO_SYS_socketcall, SYS_BIND, avec) != 0) { -#endif - err = -errno; - goto out; - } - - return 0; -out: - errno = -err; - return -1; -} - -int -SYSIO_INTERFACE_NAME(listen)(int s, int backlog) -{ - int err; - struct file *fil; -#ifdef SYSIO_SYS_socketcall - unsigned long avec[2]; -#endif - - err = 0; - - fil = _sysio_fd_find(s); - if (!fil) { - err = -EBADF; - goto out; - } - -#ifndef SYSIO_SYS_socketcall - if (syscall(SYSIO_SYS_listen, - I2SKI(fil->f_ino)->ski_fd, - backlog) != 0) { -#else - avec[0] = I2SKI(fil->f_ino)->ski_fd; - avec[1] = backlog; - if (syscall(SYSIO_SYS_socketcall, SYS_LISTEN, avec) != 0) { -#endif - err = -errno; - goto out; - } - - return 0; -out: - errno = -err; - return -1; -} - -int -SYSIO_INTERFACE_NAME(connect)(int sockfd, - const struct sockaddr *serv_addr, - socklen_t addrlen) -{ - int err; - struct file *fil; -#ifdef SYSIO_SYS_socketcall - unsigned long avec[3]; -#endif - - err = 0; - - fil = _sysio_fd_find(sockfd); - if (!fil) { - err = -EBADF; - goto out; - } - -#ifndef SYSIO_SYS_socketcall - if (syscall(SYSIO_SYS_connect, - I2SKI(fil->f_ino)->ski_fd, - serv_addr, - addrlen) != 0) { -#else - avec[0] = I2SKI(fil->f_ino)->ski_fd; - avec[1] = (unsigned long )serv_addr; - avec[2] = addrlen; - if (syscall(SYSIO_SYS_socketcall, SYS_CONNECT, avec) != 0) { -#endif - err = -errno; - goto out; - } - - return 0; -out: - errno = -err; - return -1; -} diff --git a/libsysio/drivers/yod/.cvsignore b/libsysio/drivers/yod/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/drivers/yod/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/drivers/yod/fs_yod.c b/libsysio/drivers/yod/fs_yod.c deleted file mode 100644 index 6750ffb97eb671f316f9c9973293ab9714b8aaff..0000000000000000000000000000000000000000 --- a/libsysio/drivers/yod/fs_yod.c +++ /dev/null @@ -1,1252 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#include <stdio.h> /* for NULL */ -#include <stdlib.h> -#ifdef __linux__ -#include <string.h> -#endif -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <syscall.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/fcntl.h> -#if 0 -#include <sys/vfs.h> -#endif -#ifdef _HAVE_STATVFS -#include <sys/statvfs.h> -#endif -#include <utime.h> -#include <sys/queue.h> - -#include "xtio.h" -#include "sysio.h" -#include "fs.h" -#include "mount.h" -#include "inode.h" - -#include "fs_yod.h" - -/* - * Remote file system driver - * calls are re-directed to the initiating yod - */ -#include "cplant-yod.h" - -/* stat struct used by yod, which - * is not compiled with __USE_FILE_OFFSET64 - */ -#define __yod_stat stat -#ifdef ALPHA_LINUX -#define COPY_STAT(src, dest) \ -do { \ - memset((dest), 0, sizeof((*dest))); \ - (dest)->st_dev = (src)->st_dev; \ - (dest)->st_ino = (src)->st_ino; \ - (dest)->st_mode = (src)->st_mode; \ - (dest)->st_nlink = (src)->st_nlink; \ - (dest)->st_uid = (src)->st_uid; \ - (dest)->st_gid = (src)->st_gid; \ - (dest)->st_rdev = (src)->st_rdev; \ - (dest)->st_size = (src)->st_size; \ - (dest)->st_atime = (src)->st_atime; \ - (dest)->st_mtime = (src)->st_mtime; \ - (dest)->st_ctime = (src)->st_ctime; \ - (dest)->st_blksize = (src)->st_blksize; \ - (dest)->st_blocks = (src)->st_blocks; \ - (dest)->st_flags = (src)->st_flags; \ - (dest)->st_gen = (src)->st_gen; \ -} while (0); -#else -#define COPY_STAT(src, dest) \ -do { \ - memset((dest), 0, sizeof((*dest))); \ - (dest)->st_dev = (src)->st_dev; \ - (dest)->st_ino = (src)->st_ino; \ - (dest)->st_mode = (src)->st_mode; \ - (dest)->st_nlink = (src)->st_nlink; \ - (dest)->st_uid = (src)->st_uid; \ - (dest)->st_gid = (src)->st_gid; \ - (dest)->st_rdev = (src)->st_rdev; \ - (dest)->st_size = (src)->st_size; \ - (dest)->st_atime = (src)->st_atime; \ - (dest)->st_mtime = (src)->st_mtime; \ - (dest)->st_ctime = (src)->st_ctime; \ - (dest)->st_blksize = (src)->st_blksize; \ - (dest)->st_blocks = (src)->st_blocks; \ -} while (0); -#endif - -/* - * Yod file identifiers format. - */ -struct yod_inode_identifier { - dev_t dev; /* device number */ - ino_t ino; /* i-number */ -#ifdef HAVE_GENERATION - unsigned int gen; /* generation number */ -#endif -}; - -/* - * Driver-private i-node information we keep about local host file - * system objects. - */ -struct yod_inode { - unsigned ni_seekok : 1; /* can seek? */ - struct yod_inode_identifier ni_ident; /* unique identifier */ - struct file_identifier ni_fileid; /* ditto */ - int ni_fd; /* host fildes */ - int ni_oflags; /* flags, from open */ - unsigned ni_nopens; /* soft ref count */ - _SYSIO_OFF_T ni_fpos; /* current pos */ -}; - -static int yod_inop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt, - const char *path); -static int yod_inop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stbuf); -static int yod_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf); -static ssize_t yod_filldirentries(struct inode *ino, - off64_t *posp, - char *buf, - size_t nbytes); -static int yod_inop_mkdir(struct pnode *pno, mode_t mode); -static int yod_inop_rmdir(struct pnode *pno); -static int yod_inop_symlink(struct pnode *pno, const char *data); -static int yod_inop_readlink(struct pnode *pno, char *buf, size_t bufsiz); -static int yod_inop_open(struct pnode *pno, int flags, mode_t mode); -static int yod_inop_close(struct inode *ino); -static int yod_inop_link(struct pnode *old, struct pnode *new); -static int yod_inop_unlink(struct pnode *pno); -static int yod_inop_rename(struct pnode *old, struct pnode *new); -static _SYSIO_OFF_T yod_inop_pos (struct inode *ino, _SYSIO_OFF_T off); -static int yod_inop_read(struct inode *ino, struct ioctx *ioctx); -static int yod_inop_write(struct inode *ino, struct ioctx *ioctx); -static int yod_inop_iodone(struct ioctx *ioctx); -static int yod_inop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn); -static int yod_inop_sync(struct inode *ino); -static int yod_inop_datasync(struct inode *ino); -static int yod_inop_ioctl(struct inode *ino, - unsigned long int request, - va_list ap); -static int yod_inop_mknod(struct pnode *pno, mode_t mode, dev_t dev); -#ifdef _HAVE_STATVFS -static int yod_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf); -#endif -static void yod_inop_gone(struct inode *ino); - -static struct inode_ops yod_i_ops = { - yod_inop_lookup, - yod_inop_getattr, - yod_inop_setattr, - yod_filldirentries, - yod_inop_mkdir, - yod_inop_rmdir, - yod_inop_symlink, - yod_inop_readlink, - yod_inop_open, - yod_inop_close, - yod_inop_link, - yod_inop_unlink, - yod_inop_rename, - yod_inop_read, - yod_inop_write, - yod_inop_pos, - yod_inop_iodone, - yod_inop_fcntl, - yod_inop_sync, - yod_inop_datasync, - yod_inop_ioctl, - yod_inop_mknod, -#ifdef _HAVE_STATVFS - yod_inop_statvfs, -#endif - yod_inop_gone -}; - -static int yod_fsswop_mount(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp); - -static struct fssw_ops yod_fssw_ops = { - yod_fsswop_mount -}; - -static void yod_fsop_gone(struct filesys *fs); - -static struct filesys_ops yod_inodesys_ops = { - yod_fsop_gone -}; - -/* - * Placeholder internal mount as in native driver - */ -static struct mount *yod_internal_mount = NULL; - -/* - * Given i-node, return driver private part. - */ -#define I2NI(ino) ((struct yod_inode *)((ino)->i_private)) - -/* - * stat -- by path. - */ -static int -yod_stat(const char *path, struct intnl_stat *buf) -{ - int err; - struct __yod_stat stbuf; - - err = stat_yod(path, &stbuf); - if (err) - err = -errno; - COPY_STAT(&stbuf, buf); - - return err; -} - -/* - * stat -- by fildes - */ -static int -yod_fstat(int fd, struct intnl_stat *buf) -{ - int err; - struct __yod_stat stbuf; - - err = fstat_yod(fd, &stbuf); - if (err) - err = -errno; - COPY_STAT(&stbuf, buf); - - return err; -} - -/* - * Introduce an i-node to the system. - */ -static struct inode * -yod_i_new(struct filesys *fs, struct intnl_stat *buf) -{ - struct yod_inode *nino; - struct inode *ino; - - nino = malloc(sizeof(struct yod_inode)); - if (!nino) - return NULL; - bzero(&nino->ni_ident, sizeof(nino->ni_ident)); - nino->ni_seekok = 0; - nino->ni_ident.dev = buf->st_dev; - nino->ni_ident.ino = buf->st_ino; -#ifdef HAVE_GENERATION - nino->ni_ident.gen = buf->st_gen; -#endif - nino->ni_fileid.fid_data = &nino->ni_ident; - nino->ni_fileid.fid_len = sizeof(nino->ni_ident); - nino->ni_fd = -1; - nino->ni_oflags = 0; - nino->ni_nopens = 0; - nino->ni_fpos = 0; - ino = - _sysio_i_new(fs, - &nino->ni_fileid, - buf, - 0, - &yod_i_ops, - nino); - if (!ino) - free(nino); - return ino; -} - -/* - * Initialize this driver. - */ -int -_sysio_yod_init() -{ - - /* - * Capture current process umask and reset our process umask to - * zero. All permission bits to open/creat/setattr are absolute -- - * They've already had a umask applied, when appropriate. - */ - _sysio_umask = syscall(SYS_umask, 0); - - return _sysio_fssw_register("yod", &yod_fssw_ops); -} - -/* - * Create private, internal, view of the hosts name space. - */ -static int -create_internal_namespace() -{ - int err; - struct mount *mnt; - struct inode *rootino; - struct pnode_base *rootpb; - static struct qstr noname = { NULL, 0, 0 }; - struct filesys *fs; - struct intnl_stat stbuf; - - if (yod_internal_mount) { - /* - * Reentered! - */ - abort(); - } - - /* - * We maintain an artificial, internal, name space in order to - * have access to fully qualified path names in the various routines. - * Initialize that name space now. - */ - mnt = NULL; - rootino = NULL; - rootpb = NULL; - fs = _sysio_fs_new(&yod_inodesys_ops, 0, NULL); - if (!fs) { - err = -ENOMEM; - goto error; - } - - /* - * Get root i-node. - */ - err = yod_stat("/", &stbuf); - if (err) - goto error; - rootino = yod_i_new(fs, &stbuf); - if (!rootino) { - err = -ENOMEM; - goto error; - } - - /* - * Generate base path-node for root. - */ - rootpb = _sysio_pb_new(&noname, NULL, rootino); - if (!rootpb) { - err = -ENOMEM; - goto error; - } - - /* - * Mount it. This name space is disconnected from the - * rest of the system -- Only available within this driver. - */ - err = _sysio_do_mount(fs, rootpb, 0, NULL, &mnt); - if (err) - goto error; - - yod_internal_mount = mnt; - return 0; -error: - if (mnt) { - if (_sysio_do_unmount(mnt) != 0) - abort(); - fs = NULL; - rootpb = NULL; - rootino = NULL; - } - if (rootpb) - _sysio_pb_gone(rootpb); - if (fs) { - FS_RELE(fs); - } - - return err; -} - -static int -yod_fsswop_mount(const char *source, - unsigned flags, - const void *data __IS_UNUSED, - struct pnode *tocover, - struct mount **mntp) -{ - int err; - struct nameidata nameidata; - struct mount *mnt; - - /* - * Caller must use fully qualified path names when specifying - * the source. - */ - if (*source != '/') - return -ENOENT; - - if (!yod_internal_mount) { - err = create_internal_namespace(); - if (err) - return err; - } - - /* - * Lookup the source in the internally maintained name space. - */ - ND_INIT(&nameidata, 0, source, yod_internal_mount->mnt_root, NULL); - err = _sysio_path_walk(yod_internal_mount->mnt_root, &nameidata); - if (err) - return err; - - /* - * Have path-node specified by the given source argument. Let the - * system finish the job, now. - */ - err = - _sysio_do_mount(yod_internal_mount->mnt_fs, - nameidata.nd_pno->p_base, - flags, - tocover, - &mnt); - /* - * Release the internal name space pnode and clean up any - * aliases we might have generated. We really don't need to cache them - * as they are only used at mount time.. - */ - P_RELE(nameidata.nd_pno); - (void )_sysio_p_prune(yod_internal_mount->mnt_root); - - if (!err) { - FS_REF(yod_internal_mount->mnt_fs); - *mntp = mnt; - } - return err; -} - -static int -yod_i_invalid(struct inode *inop, struct intnl_stat *stat) -{ - /* - * Validate passed in inode against stat struct info - */ - struct yod_inode *nino = I2NI(inop); - - if ((nino->ni_ident.dev != stat->st_dev || - nino->ni_ident.ino != stat->st_ino || -#ifdef HAVE_GENERATION - nino->ni_ident.gen != stat->st_gen || -#endif - ((inop)->i_stbuf.st_mode & S_IFMT) != (stat->st_mode & S_IFMT)) || - (((inop)->i_stbuf.st_rdev != stat->st_rdev) && - (S_ISCHR((inop)->i_stbuf.st_mode) || - S_ISBLK((inop)->i_stbuf.st_mode)))) - return 1; - - return 0; -} - -/* - * Find, and validate, or create i-node by host-relative path. Returned i-node - * is referenced. - */ -static int -yod_iget(struct filesys *fs, - const char *path, - struct inode **inop, - int forced) -{ - int err; - struct inode *ino; - struct intnl_stat stbuf; - struct yod_inode_identifier ident; - struct file_identifier fileid; - - /* - * Get file status. - */ - err = yod_stat(path, &stbuf); - if (err) { - *inop = NULL; - return err; - } - - /* - * Validate? - */ - if (*inop) { - if (!yod_i_invalid(*inop, &stbuf)) - return 0; - /* - * Invalidate. - */ - *inop = NULL; - } - - /* - * I-node is not already known. Find or create it. - */ - bzero(&ident, sizeof(ident)); - ident.dev = stbuf.st_dev; - ident.ino = stbuf.st_ino; -#ifdef HAVE_GENERATION - ident.gen = stbuf.st_gen; -#endif - fileid.fid_data = &ident; - fileid.fid_len = sizeof(ident); - ino = _sysio_i_find(fs, &fileid); - if (ino && forced) { - /* - * Insertion was forced but it's already present! - */ - if (yod_i_invalid(ino, &stbuf)) { - /* - * Cached inode has stale attrs - * make way for the new one - */ - I_RELE(ino); - _sysio_i_undead(ino); - ino = NULL; - } else - /* - * OK to reuse cached inode - */ - goto out; - } - - if (!ino) { - ino = yod_i_new(fs, &stbuf); - if (!ino) - err = -ENOMEM; - } -out: - if (!err) - *inop = ino; - return err; -} - -/* - * Look up named object in host's name space by path. - */ -static int -yod_path_lookup(struct filesys *fs, const char *path, struct inode **inop) -{ - - return yod_iget(fs, path, inop, 0); -} - -/* - * Look up object by it's path node. - */ -static int -yod_i_lookup(struct filesys *fs, struct pnode_base *pb, struct inode **inop) -{ - int err; - char *path; - - path = _sysio_pb_path(pb, '/'); - if (!path) - return -ENOMEM; - err = yod_path_lookup(fs, path, inop); - free(path); - return err; -} - -static int -yod_inop_lookup(struct pnode *pno, - struct inode **inop, - struct intent *intnt __IS_UNUSED, - const char *path __IS_UNUSED) -{ - int err; - - *inop = pno->p_base->pb_ino; - - /* - * Don't have an inode yet. Because we translate everything back to - * a single name space for the host, we will assume the object the - * caller is looking for has no existing alias in our internal - * name space. We don't see the same file on different mounts in the - * underlying host FS as the same file. - * - * The file identifier *will* be unique. It's got to have a different - * dev. - */ - err = yod_i_lookup(pno->p_mount->mnt_fs, pno->p_base, inop); - if (err) - *inop = NULL; - return err; -} - -static int -yod_inop_getattr(struct pnode *pno, struct inode *ino, struct intnl_stat *stbuf) -{ - char *path; - int err; - - path = NULL; - if (!ino || I2NI(ino)->ni_fd < 0) { - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - } - err = - path - ? yod_stat(path, stbuf) - : yod_fstat(I2NI(ino)->ni_fd, stbuf); - if (path) - free(path); - return err; -} - -static int -yod_inop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf) -{ - char *path; - int fd; - struct intnl_stat st; - int err; - - path = NULL; - fd = ino ? I2NI(ino)->ni_fd : -1; - if (fd < 0 || mask & (SETATTR_MTIME|SETATTR_ATIME)) { - if (!pno) - return -EEXIST; - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - } - - /* - * Get current status for undo. - */ - err = - fd < 0 - ? yod_stat(path, &st) - : yod_fstat(fd, &st); - if (err) - goto out; - - if (mask & SETATTR_MODE) { - mode_t mode; - - /* - * Alter permissions attribute. - */ - mode = stbuf->st_mode & 07777; - err = chmod_yod(path, mode); - } - if (err) - mask &= ~SETATTR_MODE; - - if (mask & (SETATTR_UID|SETATTR_GID)) { - - /* - * Alter owner and/or group identifiers. - */ - err = chown_yod(path, - mask & SETATTR_UID - ? stbuf->st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? stbuf->st_gid - : (gid_t )-1); - } - if (err) - mask &= ~(SETATTR_UID|SETATTR_GID); - else if (mask & SETATTR_LEN) { - /* - * Do the truncate last. It can't be undone. - */ - (void )(fd < 0 - ? truncate_yod(path, stbuf->st_size) - : ftruncate_yod(fd, stbuf->st_size)); - } - if (!err) - goto out; - /* - * Undo after error. Some or all of this might not work... We - * can but try. - */ - if (mask & (SETATTR_UID|SETATTR_GID)) { - (void )chown_yod(path, - mask & SETATTR_UID - ? st.st_uid - : (uid_t )-1, - mask & SETATTR_GID - ? st.st_gid - : (gid_t )-1); - } - if (mask & SETATTR_MODE) { - chmod_yod(path, st.st_mode & 07777); - } -out: - if (path) - free(path); - return err; -} - -static ssize_t -yod_filldirentries(struct inode *ino, - char *buf, - _SYSIO_OFF_T *posp, - size_t nbytes) -{ - struct yod_inode *nino = I2NI(ino); - _SYSIO_OFF_T result; - ssize_t cc; - - assert(nino->ni_fd >= 0); - - result = *basep; - if (*basep != nino->ni_fpos && - (result = lseek_yod(nino->ni_fd, - *posp, - SEEK_SET) == -1)) - return -errno; - nino->ni_fpos = result; - memset(buf, 0, nbytes); - /* - * This is almost certainly broken. The resulting position parameter - * points to the block just filled, not the next. - */ - cc = getdirentries_yod(nino->ni_fd, buf, nbytes, &result); - if (cc < 0) - return -errno; - nino->ni_fpos = *posp = result; - return cc; -} - -static int -yod_inop_mkdir(struct pnode *pno, mode_t mode) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = mkdir_yod(path, mode); - free(path); - return err; -} - -static int -yod_inop_rmdir(struct pnode *pno) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = rmdir_yod(path); - free(path); - return err; -} - -static int -yod_inop_symlink(struct pnode *pno, const char *data) -{ - char *path; - int err; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - err = symlink_yod(data, path); - free(path); - return err; -} - -static int -yod_inop_readlink(struct pnode *pno __IS_UNUSED, - char *buf __IS_UNUSED, - size_t bufsiz __IS_UNUSED) -{ - - return -ENOSYS; -} - -static int -yod_inop_open(struct pnode *pno, int flags, mode_t mode) -{ - struct yod_inode *nino; - char *path; - int fd; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - /* - * Whether the file is already open, or not, makes no difference. - * Want to always give the host OS a chance to authorize in case - * something has changed underneath us. - */ - if (flags & O_WRONLY) { - /* - * Promote write-only attempt to RW. - */ - flags &= ~O_WRONLY; - flags |= O_RDWR; - } - fd = open_yod(path, flags, mode); - if (!pno->p_base->pb_ino && fd >= 0) { - int err; - - /* - * Success but we need to return an i-node. - */ - err = - yod_iget(pno->p_mount->mnt_fs, - path, - &pno->p_base->pb_ino, - 1); - if (err) { - (void )close_yod(fd); - if (err == -EEXIST) - abort(); - fd = err; - } - } - free(path); - if (fd < 0) - return -errno; - - /* - * Remember this new open. - */ - nino = I2NI(pno->p_base->pb_ino); - nino->ni_nopens++; - assert(nino->ni_nopens); - - if (nino->ni_fd >= 0) { - if ((nino->ni_oflags & O_RDWR) || - (flags & (O_RDONLY|O_WRONLY|O_RDWR)) == O_RDONLY) { - /* - * Keep existing. - */ - (void )close_yod(fd); - return 0; - } - (void )close_yod(nino->ni_fd); - } - /* - * Invariant; First open. Must init. - */ - nino->ni_fpos = 0; - nino->ni_fd = fd; - - /* - * Need to know whether we can seek on this - * descriptor. - */ - nino->ni_seekok = - lseek_yod(nino->ni_fd, 0, SEEK_CUR) != 0 ? 0 : 1; - - return 0; -} - -static int -yod_inop_close(struct inode *ino) -{ - struct yod_inode *nino = I2NI(ino); - int err; - - if (nino->ni_fd < 0) - abort(); - - assert(nino->ni_nopens); - if (--nino->ni_nopens) - return 0; - - err = close_yod(nino->ni_fd); - if (err) - return -errno; - - nino->ni_fd = -1; - nino->ni_fpos = 0; - return 0; -} - -static int -yod_inop_link(struct pnode *old, struct pnode *new) -{ - int err; - char *opath, *npath; - - err = 0; - - opath = _sysio_pb_path(old->p_base, '/'); - npath = _sysio_pb_path(new->p_base, '/'); - if (!(opath && npath)) { - err = -ENOMEM; - goto out; - } - - err = link_yod(opath, npath); - -out: - if (opath) - free(opath); - if (npath) - free(npath); - - return err; -} - -static int -yod_inop_unlink(struct pnode *pno) -{ - char *path; - int err = 0; - - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - - /* - * For this driver, unlink is easy with open files. Since the - * file remains open to the system, too, the descriptors are still - * valid. - * - * Other drivers will have some difficulty here as the entry in the - * file system name space must be removed without sacrificing access - * to the file itself. In NFS this is done with a mechanism referred - * to as a `silly delete'. The file is moved to a temporary name - * (usually .NFSXXXXXX, where the X's are replaced by the PID and some - * unique characters) in order to simulate the proper semantic. - */ - if (unlink_yod(path) != 0) - err = -errno; - free(path); - return err; -} - -/* - * A helper function performing the real IO operation work. - * - * We don't really have async IO. We'll just perform the function - * now. - */ -static int -doio(ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, struct yod_inode *), - struct ioctx *ioctx) -{ - struct yod_inode *nino = I2NI(ioctx->ioctx_ino); - - ioctx->ioctx_cc = - _sysio_doio(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, - ioctx->ioctx_iov, ioctx->ioctx_iovlen, - (ssize_t (*)(void *, size_t, - _SYSIO_OFF_T, void *))f, - nino); - if (ioctx->ioctx_cc < 0) { - ioctx->ioctx_errno = -ioctx->ioctx_cc; - ioctx->ioctx_cc = -1; - return -1; - } - nino->ni_fpos += ioctx->ioctx_cc; - ioctx->ioctx_done = 1; - return 0; -} - -static ssize_t -yod_read_simple(void *buf, - size_t nbytes, - _SYSIO_OFF_T off, - struct yod_inode *nino) -{ - if (off != nino->ni_fpos) { - _SYSIO_OFF_T rtn; - - rtn = lseek_yod(nino->ni_fd, off, SEEK_SET); - if (rtn < 0) - return -1; - nino->ni_fpos = rtn; - } - return read_yod(nino->ni_fd, buf, nbytes); -} - -static int -yod_inop_read(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) -{ - - return doio(yod_read_simple, ioctx); -} - -static int -yod_inop_rename(struct pnode *old, struct pnode *new) -{ - int err; - char *opath, *npath; - - opath = _sysio_pb_path(old->p_base, '/'); - npath = _sysio_pb_path(new->p_base, '/'); - if (!(opath && npath)) { - err = -ENOMEM; - goto out; - } - - err = rename_yod(opath, npath); - -out: - if (opath) - free(opath); - if (npath) - free(npath); - - return err; -} - -static ssize_t -yod_write_simple(void *buf, - size_t nbytes, - _SYSIO_OFF_T off, - struct yod_inode *nino) -{ - - if (off != nino->ni_fpos) { - _SYSIO_OFF_T rtn; - - rtn = lseek_yod(nino->ni_fd, off, SEEK_SET); - if (rtn < 0) - return -1; - nino->ni_fpos = rtn; - } - return write_yod(nino->ni_fd, buf, nbytes); -} - -static int -yod_inop_write(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) -{ - - return doio(yod_write_simple, ioctx); -} - -static _SYSIO_OFF_T -yod_inop_pos(struct inode *ino, _SYSIO_OFF_T off) -{ - struct yod_inode *nino = I2NI(ino); - int err; - - err = lseek_yod(nino->ni_fd, off, SEEK_SET); - return err < 0 ? err : off; -} - -static int -yod_inop_iodone(struct ioctx *ioctxp __IS_UNUSED) -{ - - /* - * It's always done in this driver. It completed when posted. - */ - return 1; -} - -static int -yod_inop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) -{ - struct yod_inode *nino = I2NI(ino); - long arg; - int err; - - if (nino->ni_fd < 0) - abort(); - - err = 0; - switch (cmd) { - case F_GETFD: - case F_GETFL: -#ifdef F_GETOWN - case F_GETOWN: -#endif - *rtn = syscall(SYS_fcntl, nino->ni_fd, cmd); - if (*rtn == -1) - err = -errno; - break; - case F_DUPFD: - case F_SETFD: - case F_SETFL: - case F_GETLK: - case F_SETLK: - case F_SETLKW: -#ifdef F_SETOWN - case F_SETOWN: -#endif - arg = va_arg(ap, long); - *rtn = syscall(SYS_fcntl, nino->ni_fd, cmd, arg); - if (*rtn == -1) - err = -errno; - break; - default: - *rtn = -1; - err = -EINVAL; - } - return err; -} - -static int -yod_inop_mknod(struct pnode *pno __IS_UNUSED, - mode_t mode __IS_UNUSED, - dev_t dev __IS_UNUSED) -{ - - return -ENOSYS; -} - -#ifdef _HAVE_STATVFS -static int -yod_inop_statvfs(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf) -{ - char *path; - int rc; - struct statfs fs; - - path = NULL; - if (!ino || I2NI(ino)->ni_fd < 0) { - path = _sysio_pb_path(pno->p_base, '/'); - if (!path) - return -ENOMEM; - } - - /* - * The syscall interface does not support SYS_fstatvfs. - * Should possibly return ENOSYS, but thought it - * better to use SYS_fstatfs and fill in as much of - * the statvfs structure as possible. This allows - * for more of a test of the sysio user interface. - */ - rc = - path - ? statfs_yod(path, &fs) - : fstatfs_yod(I2NI(ino)->ni_fd, &fs); - if (path) - free(path); - if (rc < 0) - return -errno; - - buf->f_bsize = fs.f_bsize; /* file system block size */ - buf->f_frsize = fs.f_bsize; /* file system fundamental block size */ - buf->f_blocks = fs.f_blocks; - buf->f_bfree = fs.f_bfree; - buf->f_bavail = fs.f_bavail; - buf->f_files = fs.f_files; /* Total number serial numbers */ - buf->f_ffree = fs.f_ffree; /* Number free serial numbers */ - buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/ - buf->f_fsid = fs.f_fsid.__val[1]; - buf->f_flag = 0; /* No equiv in statfs; maybe use type? */ - buf->f_namemax = fs.f_namelen; - return 0; -} -#endif - -static int -yod_inop_sync(struct inode *ino) -{ - - assert(I2NI(ino)->ni_fd >= 0); - - return fsync_yod(I2NI(ino)->ni_fd); -} - -static int -yod_inop_datasync(struct inode *ino) -{ - - assert(I2NI(ino)->ni_fd >= 0); - - return fsync_yod(I2NI(ino)->ni_fd); -} - -static int -yod_inop_ioctl(struct inode *ino __IS_UNUSED, - unsigned long int request __IS_UNUSED, - va_list ap __IS_UNUSED) -{ - - /* - * I'm lazy. Maybe implemented later. - */ - errno = ENOTTY; - return -1; -} - -static void -yod_inop_gone(struct inode *ino) -{ - struct yod_inode *nino = I2NI(ino); - - if (nino->ni_fd) - (void )close(nino->ni_fd); - free(ino->i_private); -} - -static void -yod_fsop_gone(struct filesys *fs __IS_UNUSED) -{ - - /* - * Do nothing. There is no private part maintained for the - * yod file interface. - */ -} diff --git a/libsysio/drivers/yod/fs_yod.h b/libsysio/drivers/yod/fs_yod.h deleted file mode 100644 index 174b82dd812d17fe0bff7a89cbeaedb77cc957d7..0000000000000000000000000000000000000000 --- a/libsysio/drivers/yod/fs_yod.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Remote file system driver support. - */ - -extern int _sysio_yod_init(); diff --git a/libsysio/drivers/yod/module.mk b/libsysio/drivers/yod/module.mk deleted file mode 100644 index 1c2cc910862f012a93c79c53eb5bc593ff22785c..0000000000000000000000000000000000000000 --- a/libsysio/drivers/yod/module.mk +++ /dev/null @@ -1,10 +0,0 @@ -if WITH_CPLANT_YOD -YOD_SRCS = drivers/yod/fs_yod.c -YOD_DRIVER_FLAGS = -DCPLANT_YOD -else -YOD_SRCS = -YOD_DRIVER_FLAGS = -endif - -# Bring yod files along in the distribution regardless -YOD_EXTRA = include/cplant-yod.h drivers/yod/fs_yod.h drivers/yod/module.mk diff --git a/libsysio/include/cplant-yod.h b/libsysio/include/cplant-yod.h deleted file mode 100644 index 8aa4b50fb543ee04ae655fd40379733b3269bd53..0000000000000000000000000000000000000000 --- a/libsysio/include/cplant-yod.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * cplant yod I/O functions - */ -extern int chmod_yod(const char* path, mode_t); -extern int chown_yod(const char* path, uid_t, gid_t); -extern int stat_yod(const char *path, struct stat *sbuf); -extern int fstat_yod(int fd, struct stat *buf); -#ifdef _HAVE_STATVFS -extern int statfs_yod(const char *path, struct statfs *sbuf); -extern int fstatfs_yod(int fd, struct statfs *buf); -#endif -extern int mkdir_yod(const char *path, mode_t mode); -extern int rmdir_yod(const char *path); -extern int getdirentries_yod(int fd, char *buf, size_t nbytes, loff_t *basep); -extern int link_yod(const char *path1, const char *path2); -extern int unlink_yod(const char *path); -extern int symlink_yod(const char *path1, const char *path2 ); -extern int rename_yod( const char *path1, const char *path2 ); -extern int open_yod(const char *fname, int flags, mode_t mode); -extern int close_yod(int); -extern ssize_t write_yod(int fd, const void *buff, size_t nbytes); -extern ssize_t read_yod(int fd, void *buff, size_t nbytes); -extern int fsync_yod(int fd); -extern int truncate_yod(const char *path, off_t length); -extern int ftruncate_yod(int fd, long length); -extern off_t lseek_yod(int fd, off_t offset, int whence); diff --git a/libsysio/include/creds.h b/libsysio/include/creds.h deleted file mode 100644 index 90305e1d305f294cb1cc56122d07f7427b97044a..0000000000000000000000000000000000000000 --- a/libsysio/include/creds.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> - -#ifndef _CREDS_H_ -#define _CREDS_H_ - -/* - * Data structure for user credentials - */ - -struct creds { - uid_t creds_uid; - gid_t *creds_gids; - int creds_ngids; -}; - -#endif diff --git a/libsysio/include/dev.h b/libsysio/include/dev.h deleted file mode 100644 index 2620d494163ce53db6668e340c7f93671d8e4175..0000000000000000000000000000000000000000 --- a/libsysio/include/dev.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Device support. - */ - -/* - * Make a device number, composed of major and minor parts. We *assume* that - * the system version of a dev_t is 16 bits or more. - */ -#define SYSIO_MKDEV(major, minor) \ - ((((major) & 0xff) << 8) | ((minor) & 0xff)) - -/* - * Return major unit given dev number. - */ -#define SYSIO_MAJOR_DEV(dev) \ - (((dev) >> 8) & 0xff) - -/* - * Return minor unit given dev number. - */ -#define SYSIO_MINOR_DEV(dev) \ - ((dev) & 0xff) - -extern const struct inode_ops _sysio_nodev_ops; - -#define _sysio_nodev_inop_lookup \ - (int (*)(struct pnode *, \ - struct inode **, \ - struct intent *, \ - const char *))_sysio_do_illop -#define _sysio_nodev_inop_getattr \ - (int (*)(struct pnode *, \ - struct inode *, \ - struct intnl_stat *))_sysio_do_ebadf -#define _sysio_nodev_inop_setattr \ - (int (*)(struct pnode *, \ - struct inode *, \ - unsigned , \ - struct intnl_stat *))_sysio_do_ebadf -#define _sysio_nodev_filldirentries \ - (ssize_t (*)(struct inode *, \ - _SYSIO_OFF_T *, \ - char *, \ - size_t))_sysio_do_illop -#define _sysio_nodev_inop_mkdir \ - (int (*)(struct pnode *, \ - mode_t))_sysio_do_illop -#define _sysio_nodev_inop_rmdir \ - (int (*)(struct pnode *))_sysio_do_illop -#define _sysio_nodev_inop_symlink \ - (int (*)(struct pnode *, \ - const char *))_sysio_do_illop -#define _sysio_nodev_inop_readlink \ - (int (*)(struct pnode *, \ - char *, \ - size_t))_sysio_do_illop -#define _sysio_nodev_inop_open \ - (int (*)(struct pnode *, \ - int, \ - mode_t))_sysio_do_enodev -#define _sysio_nodev_inop_close \ - (int (*)(struct inode *))_sysio_do_ebadf -#define _sysio_nodev_inop_link \ - (int (*)(struct pnode *, struct pnode *))_sysio_do_illop -#define _sysio_nodev_inop_unlink \ - (int (*)(struct pnode *))_sysio_do_illop -#define _sysio_nodev_inop_rename \ - (int (*)(struct pnode *, struct pnode *))_sysio_do_illop -#define _sysio_nodev_inop_read \ - (int (*)(struct inode *, \ - struct ioctx *))_sysio_do_ebadf -#define _sysio_nodev_inop_write \ - (int (*)(struct inode *, \ - struct ioctx *))_sysio_do_ebadf -#define _sysio_nodev_inop_pos \ - (_SYSIO_OFF_T (*)(struct inode *, _SYSIO_OFF_T))_sysio_do_ebadf -#define _sysio_nodev_inop_iodone \ - (int (*)(struct ioctx *))_sysio_do_einval -#define _sysio_nodev_inop_fcntl \ - (int (*)(struct inode *, \ - int, \ - va_list, \ - int *))_sysio_do_ebadf -#define _sysio_nodev_inop_sync \ - (int (*)(struct inode *))_sysio_do_ebadf -#define _sysio_nodev_inop_datasync \ - (int (*)(struct inode *))_sysio_do_ebadf -#define _sysio_nodev_inop_ioctl \ - (int (*)(struct inode *, \ - unsigned long int, \ - va_list))_sysio_do_ebadf -#define _sysio_nodev_inop_mknod \ - (int (*)(struct pnode *, \ - mode_t, \ - dev_t))_sysio_do_illop -#ifdef _HAVE_STATVFS -#define _sysio_nodev_inop_statvfs \ - (int (*)(struct pnode *, \ - struct inode *, \ - struct intnl_statvfs *))_sysio_do_illop -#endif -#define _sysio_nodev_inop_gone \ - (void (*)(struct inode *ino))_sysio_do_noop - -extern int _sysio_dev_init(void); -extern dev_t _sysio_dev_alloc(void); -extern struct inode_ops *_sysio_dev_lookup(mode_t mode, dev_t dev); -extern int _sysio_char_dev_register(int major, - const char *name, - struct inode_ops *ops); diff --git a/libsysio/include/file.h b/libsysio/include/file.h deleted file mode 100644 index d30e84e86d8d0f91d3355d7878ad47ae0841cc9d..0000000000000000000000000000000000000000 --- a/libsysio/include/file.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Open file support. - */ - -/* - * Test whether large file support on this file. - */ -#ifdef O_LARGEFILE -#define _F_LARGEFILE(fil) \ - ((fil)->f_flags & O_LARGEFILE) -#else -#define _F_LARGEFILE(fil) \ - (1) -#endif -/* - * Return max seek value for this file. - */ -#define _SEEK_MAX(fil) \ - (_F_LARGEFILE(fil) ? _SYSIO_OFF_T_MAX : LONG_MAX) - -#ifdef _LARGEFILE64_SOURCE -#define _SYSIO_FLOCK flock64 -#else -#define _SYSIO_FLOCK flock -#endif - -/* - * A file record is maintained for each open file in the system. It holds - * all the info necessary to track the context and parameters for the - * operations that may be performed. - */ -struct file { - struct inode *f_ino; /* path node */ - _SYSIO_OFF_T f_pos; /* current stream pos */ - unsigned f_ref; /* ref count */ - int f_flags; /* open/fcntl flags */ -}; - -/* - * Reference a file record. - */ -#define F_REF(fil) \ - do { \ - (fil)->f_ref++; \ - assert((fil)->f_ref); \ - } while (0) - -/* - * Release reference to a file record. - */ -#define F_RELE(fil) \ - do { \ - assert((fil)->f_ref); \ - (fil)->f_ref--; \ - if (!(fil)->f_ref) \ - _sysio_fgone(fil); \ - } while (0) - -/* - * Init file record. - * - * NB: Don't forget to take a reference to the inode too! - */ -#define _SYSIO_FINIT(fil, ino, flags) \ - do { \ - (fil)->f_ino = (ino); \ - (fil)->f_pos = 0; \ - (fil)->f_ref = 0; \ - (fil)->f_flags = (flags); \ - } while (0) - -/* - * Determine if a file may be read/written. - * - * Given a ptr to an open file table entry and a flag indicating desired - * access return non-zero if the file record indicates that the access is - * permitted or zero, if not. - * - * 'r' for read access check - * 'w' for write access check - */ - -#define F_CHKRW(_fil, _c) \ - (((_c) == 'r' && !((_fil)->f_flags & O_WRONLY)) || \ - ((_c) == 'w' && ((_fil)->f_flags & (O_WRONLY | O_RDWR)))) - -struct ioctx; - -extern struct file *_sysio_fnew(struct inode *ino, int flags); -extern void _sysio_fgone(struct file *fil); -extern void _sysio_fcompletio(struct ioctx *ioctx, struct file *fil); -extern int _sysio_fd_close(int fd); -extern struct file *_sysio_fd_find(int fd); -extern int _sysio_fd_set(struct file *fil, int fd, int force); -extern int _sysio_fd_dup(int oldfd, int newfd, int force); -extern int _sysio_fd_close_all(void); -#ifdef ZERO_SUM_MEMORY -extern void _sysio_fd_shutdown(void); -#endif -extern _SYSIO_OFF_T _sysio_lseek_prepare(struct file *fil, - _SYSIO_OFF_T offset, - int whence, - _SYSIO_OFF_T max); diff --git a/libsysio/include/fs.h b/libsysio/include/fs.h deleted file mode 100644 index 95eab247ed65036da6b1dfd4501f238f8b2bb82b..0000000000000000000000000000000000000000 --- a/libsysio/include/fs.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * File system or volume support. - */ - -struct filesys; - -struct pnode; -struct mount; - -/* - * File system switch operations. - */ -struct fssw_ops { - int (*fsswop_mount)(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp); -}; - -/* - * File system switch entry record. - * - * Each available file system or volume access driver is represented by - * one of these switch entries in the switch. - */ -struct fsswent { - const char *fssw_name; /* entry name */ - LIST_ENTRY(fsswent) fssw_link; /* link to next */ - struct fssw_ops fssw_ops; /* operations */ -}; - -/* - * Init file system switch entry record. - */ -#define FSSWENT_INIT(fsswent, name, ops) \ - do { \ - (fsswent)->fssw_name = (name); \ - (fsswent)->fssw_ops = (ops); \ - } while (0) - -struct inode; - -/* - * File system operations. - */ -struct filesys_ops { - void (*fsop_gone)(struct filesys *); -}; - -/* - * Define the desired size of the file system record's inode table. This should - * probably be something fancy that tries to use up a system page, or the - * like. I'm not feeling adventurous right now though. It is prime though. - * That should help out the hash. - */ -#ifndef FS_ITBLSIZ -#define FS_ITBLSIZ 503 -#endif - -/* - * Inode list head record. - */ -LIST_HEAD(itable_entry, inode); - -/* - * A filesys record is maintained for each active file system or volume. - */ -struct filesys { - dev_t fs_dev; /* device ID */ - unsigned fs_ref; /* soft ref count */ - unsigned fs_flags; /* flags (see below) */ - struct filesys_ops fs_ops; /* operations */ - void *fs_private; /* driver data */ - struct itable_entry fs_itbl[FS_ITBLSIZ]; /* inodes hash */ - unsigned long fs_id; /* ID */ - size_t fs_bsize; /* block size */ -}; - -#define FS_F_RO 0x01 /* read-only */ - -/* - * Init file system record. - */ -#define FS_INIT(fs, flags, ops, private) \ - do { \ - size_t __i; \ - struct itable_entry *__head; \ - \ - (fs)->fs_ref = 1; \ - (fs)->fs_flags = (flags); \ - (fs)->fs_ops = *(ops); \ - (fs)->fs_private = (private); \ - __i = FS_ITBLSIZ; \ - __head = (fs)->fs_itbl; \ - do { \ - LIST_INIT(__head); \ - __head++; \ - } while (--__i); \ - } while (0) - -/* - * Reference file system record. - */ -#define FS_REF(fs) \ - do { \ - ++(fs)->fs_ref; \ - assert((fs)->fs_ref); \ - } while (0) - -/* - * Release reference to file system record. - */ -#define FS_RELE(fs) \ - do { \ - assert((fs)->fs_ref); \ - if (!--(fs)->fs_ref) \ - _sysio_fs_gone(fs); \ - } while (0) - -extern struct fsswent *_sysio_fssw_lookup(const char *name); -extern int _sysio_fssw_register(const char *name, struct fssw_ops *ops); -extern struct filesys * _sysio_fs_new(struct filesys_ops *ops, - unsigned mask, - void *private); -extern void _sysio_fs_gone(struct filesys *fs); -#ifdef ZERO_SUM_MEMORY -extern void _sysio_fssw_shutdown(void); -#endif diff --git a/libsysio/include/inode.h b/libsysio/include/inode.h deleted file mode 100644 index baedc507af50ef2453ae861fb3694d766bcf9337..0000000000000000000000000000000000000000 --- a/libsysio/include/inode.h +++ /dev/null @@ -1,486 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#if defined(AUTOMOUNT_FILE_NAME) && !defined(MAX_MOUNT_DEPTH) -/* - * Maximum number of automounts to attempt in path traversal. - */ -#define MAX_MOUNT_DEPTH 64 -#endif - -/* - * Each i-node is uniquely identified by a file identifier, supplied by - * the relevant file system driver. The i-node number returned in the getattrs - * call is not always enough. - */ -struct file_identifier { - void *fid_data; - size_t fid_len; -}; - -struct pnode; -struct inode; -struct intent; -struct intnl_dirent; -struct intnl_stat; -#ifdef _HAVE_STATVFS -struct intnl_statvfs; -#endif -struct io_arguments; -struct ioctx; - -/* - * Operations on i-nodes. - * - * Should this be split up into file and name space operations? - */ -struct inode_ops { - int (*inop_lookup)(struct pnode *pno, - struct inode **inop, - struct intent *intnt, - const char *path); - int (*inop_getattr)(struct pnode *pno, - struct inode *ino, - struct intnl_stat *stbuf); - int (*inop_setattr)(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf); - ssize_t (*inop_filldirentries)(struct inode *ino, - _SYSIO_OFF_T *posp, - char *buf, - size_t nbytes); - int (*inop_mkdir)(struct pnode *pno, mode_t mode); - int (*inop_rmdir)(struct pnode *pno); - int (*inop_symlink)(struct pnode *pno, const char *data); - int (*inop_readlink)(struct pnode *pno, char *buf, size_t bufsiz); - int (*inop_open)(struct pnode *pno, int flags, mode_t mode); - int (*inop_close)(struct inode *ino); - int (*inop_link)(struct pnode *old, struct pnode *new); - int (*inop_unlink)(struct pnode *pno); - int (*inop_rename)(struct pnode *old, struct pnode *new); - int (*inop_read)(struct inode *ino, struct ioctx *ioctx); - int (*inop_write)(struct inode *ino, struct ioctx *ioctx); - _SYSIO_OFF_T (*inop_pos)(struct inode *ino, _SYSIO_OFF_T off); - int (*inop_iodone)(struct ioctx *iocp); - int (*inop_fcntl)(struct inode *ino, int cmd, va_list ap, int *rtn); - int (*inop_sync)(struct inode *ino); - int (*inop_datasync)(struct inode *ino); - int (*inop_ioctl)(struct inode *ino, unsigned long int request, va_list ap); - int (*inop_mknod)(struct pnode *pno, mode_t mode, dev_t dev); -#ifdef _HAVE_STATVFS - int (*inop_statvfs)(struct pnode *pno, - struct inode *ino, - struct intnl_statvfs *buf); -#endif - void (*inop_gone)(struct inode *ino); -}; - -/* - * Values for the mask to inop_setattr. - */ -#define SETATTR_MODE 0x01 -#define SETATTR_MTIME 0x02 -#define SETATTR_ATIME 0x04 -#define SETATTR_UID 0x08 -#define SETATTR_GID 0x10 -#define SETATTR_LEN 0x20 - -/* - * An i-node record is maintained for each file object in the system. - */ -struct inode { - LIST_ENTRY(inode) i_link; /* FS i-nodes link */ - unsigned - i_immune : 1, /* immune from GC */ - i_zombie : 1; /* stale inode */ - unsigned i_ref; /* soft ref counter */ - struct inode_ops i_ops; /* operations */ - struct intnl_stat i_stbuf; /* attrs */ - struct filesys *i_fs; /* file system ptr */ - struct file_identifier *i_fid; /* file ident */ - void *i_private; /* driver data */ - TAILQ_ENTRY(inode) i_nodes; /* all i-nodes link */ -}; - -/* - * Init an i-node record. - */ -#define I_INIT(ino, fs, stat, ops, fid, immunity, private) \ - do { \ - (ino)->i_immune = (immunity) ? 1 : 0; \ - (ino)->i_zombie = 0; \ - (ino)->i_ref = 0; \ - (ino)->i_ops = *(ops); \ - (ino)->i_stbuf = *(stat); \ - (ino)->i_fs = (fs); \ - (ino)->i_fid = (fid); \ - (ino)->i_private = (private); \ - } while (0) - -/* - * Take soft reference to i-node. - */ -#define I_REF(ino) \ - do { \ - TAILQ_REMOVE(&_sysio_inodes, (ino), i_nodes); \ - TAILQ_INSERT_TAIL(&_sysio_inodes, (ino), i_nodes); \ - (ino)->i_ref++; \ - assert((ino)->i_ref); \ - } while (0) - -/* - * Release soft reference to i-node. - */ -#define I_RELE(ino) \ - do { \ - assert((ino)->i_ref); \ - if (!--(ino)->i_ref && (ino)->i_zombie) \ - _sysio_i_gone(ino); \ - } while (0) - -/* - * Attempt to kill an inode. - */ -#define I_GONE(ino) \ - do { \ - _sysio_i_undead(ino); \ - I_RELE(ino); \ - } while (0) - -/* - * The "quick string" record (inspired by the structure of the same name - * from Linux) is used to pass a string without delimiters as well as useful - * information about the string. - */ -struct qstr { - const char *name; - size_t len; - unsigned hashval; -}; - -/* - * A path node is an entry in a directory. It may have many aliases, one - * for each name space in which it occurs. This record holds the - * common information. - */ -struct pnode_base { - struct qstr pb_name; /* entry name */ - struct inode *pb_ino; /* inode */ - LIST_HEAD(, pnode_base) pb_children; /* children if a dir */ - LIST_ENTRY(pnode_base) pb_sibs; /* links to siblings */ - LIST_ENTRY(pnode_base) pb_names; /* near names links */ - LIST_HEAD(, pnode) pb_aliases; /* aliases */ - struct pnode_base *pb_parent; /* parent */ -}; - -/* - * Since a file system may be multiply mounted, in different parts of the local - * tree, a file system object may appear in different places. We handle that - * with aliases. There is one pnode for every alias the system is tracking. - * - * Name space traversal depends heavily on the interpretation of many - * of the fields in this structure. For that reason a detailed discussion - * of the various fields is given. - * - * The reference field records soft references to the record. For instance, - * it tracks file and directory opens. It does not track sibling references, - * though, as those are hard references and can be found by examining the - * aliases list in the base part of the node. - * - * The parent value points to the parent directory for this entry, in the - * *system* name space -- Not the mounted volumes. If you want to examine - * the moutned volume name space, use the base record. - * - * The base value points to the base path node information. It is info common - * to all of the aliases. - * - * The mount value points to the mount record for the rooted name space in - * which the alias is found. Notably, if a node is the root of a sub-tree then - * the mount record, among other things, indicates another node - * (in another sub-tree) that is covered by this one. - * - * Another sub-tree, mounted on this node, is indicated by a non-null cover. - * The pnode pointed to, then, is the root of the mounted sub-tree. - * - * The links list entry holds pointers to other aliases for the base path - * node entry. - * - * The nodes link is bookkeeping. - */ -struct pnode { - unsigned p_ref; /* soft ref count */ - struct pnode *p_parent; /* parent */ - struct pnode_base *p_base; /* base part */ - struct mount *p_mount; /* mount info */ - struct pnode *p_cover; /* covering pnode */ - LIST_ENTRY(pnode) p_links; /* other aliases */ - TAILQ_ENTRY(pnode) p_nodes; /* all nodes links */ -}; - -/* - * Reference path-tree node. - */ -#define P_REF(pno) \ - do { \ - TAILQ_REMOVE(&_sysio_pnodes, (pno), p_nodes); \ - TAILQ_INSERT_TAIL(&_sysio_pnodes, (pno), p_nodes); \ - (pno)->p_ref++; \ - assert((pno)->p_ref); \ - } while (0) - -/* - * Release reference to path-tree node. - */ -#define P_RELE(pno) \ - do { \ - assert((pno)->p_ref); \ - --(pno)->p_ref; \ - } while (0) - -/* - * An intent record allows callers of namei and lookup to pass some information - * about what they want to accomplish in the end. - */ -struct intent { - unsigned int_opmask; - void *int_arg1; - void *int_arg2; -}; - -/* - * Intent operations. - */ -#define INT_GETATTR 0x01 /* get attrs */ -#define INT_SETATTR 0x02 /* set attrs */ -#define INT_UPDPARENT 0x04 /* insert/delete */ -#define INT_OPEN 0x08 /* open */ -#define INT_CREAT (INT_UPDPARENT|0x10) /* insert */ -#define INT_READLINK 0x12 /* readlink */ - -#define INTENT_INIT(intnt, mask, arg1, arg2) \ - do { \ - (intnt)->int_opmask = (mask); \ - (intnt)->int_arg1 = (arg1); \ - (intnt)->int_arg2 = (arg2); \ - } while (0) - -/* - * Bundled up arguments to _sysio_path_walk. - */ -struct nameidata { - unsigned nd_flags; /* flags (see below) */ - const char *nd_path; /* path arg */ - struct pnode *nd_pno; /* returned pnode */ - struct pnode *nd_root; /* system/user root */ - struct intent *nd_intent; /* intent (NULL ok) */ - unsigned nd_slicnt; /* symlink indirects */ -#ifdef AUTOMOUNT_FILE_NAME - unsigned nd_amcnt; /* automounts */ -#endif -}; - -/* - * Values for nameidata flags field. - */ -#define ND_NOFOLLOW 0x01 /* no follow symlinks */ -#define ND_NEGOK 0x02 /* last missing is ok */ -#define ND_NOPERMCHECK 0x04 /* don't check perms */ - -#ifdef AUTOMOUNT_FILE_NAME -#define _ND_INIT_AUTOMOUNT(nd) ((nd)->nd_amcnt = 0) -#else -#define _ND_INIT_AUTOMOUNT(nd) -#endif - -#define _ND_INIT_OTHERS(nd) \ - _ND_INIT_AUTOMOUNT(nd) - -/* - * Init nameidata record. - */ -#define ND_INIT(nd, flags, path, root, intnt) \ - do { \ - (nd)->nd_flags = (flags); \ - (nd)->nd_path = (path); \ - (nd)->nd_pno = NULL; \ - (nd)->nd_root = (root); \ - (nd)->nd_intent = (intnt); \ - (nd)->nd_slicnt = 0; \ - _ND_INIT_OTHERS(nd); \ - } while (0) - -/* - * IO completion callback record. - */ -struct ioctx_callback { - TAILQ_ENTRY(ioctx_callback) iocb_next; /* list link */ - void (*iocb_f)(struct ioctx *, void *); /* cb func */ - void *iocb_data; /* cb data */ -}; - -/* - * All IO internally is done with an asynchronous mechanism. This record - * holds the completion information. It's too big :-( - */ -struct ioctx { - LIST_ENTRY(ioctx) ioctx_link; /* AIO list link */ - unsigned - ioctx_fast : 1, /* from stack space */ - ioctx_done : 1, /* transfer complete */ - ioctx_write : 1; /* op is a write */ - struct inode *ioctx_ino; /* i-node */ - const struct iovec *ioctx_iov; /* scatter/gather vec */ - size_t ioctx_iovlen; /* iovec length */ - const struct intnl_xtvec *ioctx_xtv; /* extents */ - size_t ioctx_xtvlen; /* xtv length */ - ssize_t ioctx_cc; /* rtn char count */ - int ioctx_errno; /* error number */ - TAILQ_HEAD(, ioctx_callback) ioctx_cbq; /* callback queue */ - void *ioctx_private; /* driver data */ -}; - -/* - * Init IO context record. - */ -#define IOCTX_INIT(ioctx, fast, wr, ino, iov, iovlen, xtv, xtvlen) \ - do { \ - (ioctx)->ioctx_fast = (fast); \ - (ioctx)->ioctx_done = 0; \ - (ioctx)->ioctx_write = (wr) ? 1 : 0; \ - (ioctx)->ioctx_ino = (ino); \ - (ioctx)->ioctx_iov = (iov); \ - (ioctx)->ioctx_iovlen = (iovlen); \ - (ioctx)->ioctx_xtv = (xtv); \ - (ioctx)->ioctx_xtvlen = (xtvlen); \ - (ioctx)->ioctx_cc = 0; \ - (ioctx)->ioctx_errno = 0; \ - TAILQ_INIT(&(ioctx)->ioctx_cbq); \ - (ioctx)->ioctx_private = NULL; \ - } while (0) - -/* - * Return whether access to a pnode is read-only. - */ -#define IS_RDONLY(pno) \ - ((pno)->p_mount->mnt_flags & MOUNT_F_RO) - -extern struct pnode *_sysio_root; - -extern TAILQ_HEAD(inodes_head, inode) _sysio_inodes; -extern TAILQ_HEAD(pnodes_head, pnode) _sysio_pnodes; - -extern int _sysio_i_init(void); -#ifdef ZERO_SUM_MEMORY -extern void _sysio_i_shutdown(void); -#endif -extern struct inode *_sysio_i_new(struct filesys *fs, - struct file_identifier *fid, - struct intnl_stat *stat, - unsigned immunity, - struct inode_ops *ops, - void *private); -extern struct inode *_sysio_i_find(struct filesys *fs, - struct file_identifier *fid); -extern void _sysio_i_gone(struct inode *ino); -extern void _sysio_i_undead(struct inode *ino); -extern int _sysio_p_find_alias(struct pnode *parent, - struct qstr *name, - struct pnode **pnop); -extern int _sysio_p_validate(struct pnode *pno, - struct intent *intnt, - const char *path); -extern struct pnode_base *_sysio_pb_new(struct qstr *name, - struct pnode_base *parent, - struct inode *ino); -extern void _sysio_pb_gone(struct pnode_base *pb); -extern struct pnode *_sysio_p_new_alias(struct pnode *parent, - struct pnode_base *pb, - struct mount *mnt); -extern void _sysio_p_gone(struct pnode *pno); -extern size_t _sysio_p_prune(struct pnode *root); -extern int _sysio_p_kill_all(struct pnode *root); -extern char *_sysio_pb_path(struct pnode_base *pb, char separator); -extern int _sysio_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf); -extern void _sysio_do_noop(void); -extern void _sysio_do_illop(void); -extern int _sysio_do_ebadf(void); -extern int _sysio_do_einval(void); -extern int _sysio_do_enoent(void); -extern int _sysio_do_enodev(void); -extern int _sysio_do_espipe(void); -extern int _sysio_do_eisdir(void); -extern int _sysio_do_enosys(void); -extern int _sysio_path_walk(struct pnode *parent, struct nameidata *nd); -#ifdef AUTOMOUNT_FILE_NAME -extern void _sysio_next_component(const char *path, struct qstr *name); -#endif -extern int _sysio_permitted(struct pnode *pno, int amode); -extern int _sysio_namei(struct pnode *pno, - const char *path, - unsigned flags, - struct intent *intnt, - struct pnode **pnop); -extern int _sysio_p_chdir(struct pnode *pno); -extern int _sysio_ioctx_init(void); -extern void _sysio_ioctx_enter(struct ioctx *ioctx); -extern struct ioctx *_sysio_ioctx_new(struct inode *ino, - int wr, - const struct iovec *iov, - size_t iovlen, - const struct intnl_xtvec *xtv, - size_t xtvlen); -extern int _sysio_ioctx_cb(struct ioctx *ioctx, - void (*f)(struct ioctx *, void *), - void *data); -extern void _sysio_ioctx_cb_free(struct ioctx_callback *cb); -extern struct ioctx *_sysio_ioctx_find(void *id); -extern int _sysio_ioctx_done(struct ioctx *ioctx); -extern ssize_t _sysio_ioctx_wait(struct ioctx *ioctx); -extern void _sysio_ioctx_complete(struct ioctx *ioctx); -extern int _sysio_open(struct pnode *pno, int flags, mode_t mode); -extern int _sysio_mkdir(struct pnode *where, mode_t mode); -extern int _sysio_mknod(struct pnode *where, mode_t mode, dev_t dev); diff --git a/libsysio/include/module.mk b/libsysio/include/module.mk deleted file mode 100644 index dc6bfd498d61ee135603ab5dad49a974acb09a63..0000000000000000000000000000000000000000 --- a/libsysio/include/module.mk +++ /dev/null @@ -1,5 +0,0 @@ -INCLUDE_EXTRA = include/dev.h include/file.h include/fs.h \ - include/inode.h include/mount.h include/sysio.h include/sysio-cmn.h \ - include/sysio-symbols.h include/cplant-yod.h \ - include/module.mk include/xtio.h include/stddir.h \ - include/native.h include/creds.h diff --git a/libsysio/include/mount.h b/libsysio/include/mount.h deleted file mode 100644 index 24f631d297833606da39f79325e9aeb26d38a1f4..0000000000000000000000000000000000000000 --- a/libsysio/include/mount.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Mount support. - */ - -struct filesys; -struct pnode; - -/* - * Each file system may be mounted multiple times and in various places - * in the name space. The mount record maintains the binding information - * between the system name space and the file system's. - */ -struct mount { - struct filesys *mnt_fs; /* file system */ - unsigned mnt_flags; /* flags (see below) */ - struct pnode *mnt_root; /* fs sub-tree root */ - struct pnode *mnt_covers; /* covered pnode */ - LIST_ENTRY(mount) mnt_link; /* link to next */ -}; - -/* - * Mount flags definitions. - */ -#define MOUNT_F_RO 0x01 /* read-only */ -#ifdef AUTOMOUNT_FILE_NAME -#define MOUNT_F_AUTO 0x02 /* automount enabled */ -#endif - -#ifdef AUTOMOUNT_FILE_NAME -extern struct qstr _sysio_mount_file_name; -#endif - -struct pnode_base; - -extern int _sysio_mount_init(void); -extern int _sysio_do_mount(struct filesys *fs, - struct pnode_base *rootpb, - unsigned flags, - struct pnode *tocover, - struct mount **mntp); -extern int _sysio_do_unmount(struct mount *fs); -extern int _sysio_mount_root(const char *source, - const char *type, - unsigned flags, - const void *data); -extern int _sysio_mount(struct pnode *cwd, - const char *source, - const char *target, - const char *filesystemtype, - unsigned long mountflags, - const void *data); -extern int _sysio_unmount_all(void); -#ifdef AUTOMOUNT_FILE_NAME -extern int _sysio_automount(struct pnode *mntpno); -#endif diff --git a/libsysio/include/native.h b/libsysio/include/native.h deleted file mode 100644 index d0c024bccb7de953875193586b96bcc5ea097c3a..0000000000000000000000000000000000000000 --- a/libsysio/include/native.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Native file system support. - */ - -#if ALPHA_LINUX - -/* - * stat struct from asm/stat.h, as returned - * by alpha linux kernel - */ -struct _sysio_native_stat { - unsigned int st_dev; - unsigned int st_ino; - unsigned int st_mode; - unsigned int st_nlink; - unsigned int st_uid; - unsigned int st_gid; - unsigned int st_rdev; - long st_size; - unsigned long st_atime; - unsigned long st_mtime; - unsigned long st_ctime; - unsigned int st_blksize; - int st_blocks; - unsigned int st_flags; - unsigned int st_gen; -}; - -#define SYSIO_COPY_STAT(src, dest) \ -do { \ - memset((dest), 0, sizeof((*dest))); \ - (dest)->st_dev = (src)->st_dev; \ - (dest)->st_ino = (src)->st_ino; \ - (dest)->st_mode = (src)->st_mode; \ - (dest)->st_nlink = (src)->st_nlink; \ - (dest)->st_uid = (src)->st_uid; \ - (dest)->st_gid = (src)->st_gid; \ - (dest)->st_rdev = (src)->st_rdev; \ - (dest)->st_size = (src)->st_size; \ - (dest)->st_atime = (src)->st_atime; \ - (dest)->st_mtime = (src)->st_mtime; \ - (dest)->st_ctime = (src)->st_ctime; \ - (dest)->st_blksize = (src)->st_blksize; \ - (dest)->st_blocks = (src)->st_blocks; \ - (dest)->st_flags = (src)->st_flags; \ - (dest)->st_gen = (src)->st_gen; \ -} while (0); - -#else -#define _sysio_native_stat intnl_stat -#define SYSIO_COPY_STAT(src, dest) *(dest) = *(src) -#endif - -/* - * System calls. - */ -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_lstat64) -#define SYSIO_SYS_stat SYS_lstat64 -#elif defined(SYS_lstat) -#define SYSIO_SYS_stat SYS_lstat -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_fstat64) -#define SYSIO_SYS_fstat SYS_fstat64 -#elif defined(SYS_fstat) -#define SYSIO_SYS_fstat SYS_fstat -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_truncate64) -#define SYSIO_SYS_truncate SYS_truncate64 -#elif defined(SYS_truncate) -#define SYSIO_SYS_truncate SYS_truncate -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_ftruncate64) -#define SYSIO_SYS_ftruncate SYS_ftruncate64 -#elif defined(SYS_ftruncate) -#define SYSIO_SYS_ftruncate SYS_ftruncate -#endif -#if defined(SYS_open) -#define SYSIO_SYS_open SYS_open -#endif -#if defined(SYS_close) -#define SYSIO_SYS_close SYS_close -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_lseek64) -#define SYSIO_SYS_lseek SYS_lseek64 -#elif defined(SYS_lseek) -#define SYSIO_SYS_lseek SYS_lseek -#endif -#if defined(SYS__llseek) -# if defined (__mips64__) -# define SYSIO_SYS__llseek SYS_O32__llseek -# else -# define SYSIO_SYS__llseek SYS__llseek -# endif -#endif -#if defined(SYS_read) -#define SYSIO_SYS_read SYS_read -#endif -#if defined(SYS_write) -#define SYSIO_SYS_write SYS_write -#endif -#if defined(SYS_readv) -#define SYSIO_SYS_readv SYS_readv -#endif -#if defined(SYS_writev) -#define SYSIO_SYS_writev SYS_writev -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_pread64) -#define SYSIO_SYS_pread SYS_pread64 -#elif defined(SYS_pread) -#define SYSIO_SYS_pread SYS_pread -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_pwrite64) -#define SYSIO_SYS_pwrite SYS_pwrite64 -#elif defined(SYS_pwrite) -#define SYSIO_SYS_pwrite SYS_pwrite -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_fcntl64) -#define SYSIO_SYS_fcntl SYS_fcntl64 -#elif defined(SYS_fcntl) -#define SYSIO_SYS_fcntl SYS_fcntl -#endif -#if defined(SYS_fsync) -#define SYSIO_SYS_fsync SYS_fsync -#endif -#if defined(ALPHA_LINUX) && defined(SYS_osf_fdatasync) -#define SYSIO_SYS_fdatasync SYS_osf_fdatasync -#elif defined(SYS_fdatasync) -#define SYSIO_SYS_fdatasync SYS_fdatasync -#endif -#if defined(SYS_chmod) -#define SYSIO_SYS_chmod SYS_chmod -#endif -#if defined(SYS_fchmod) -#define SYSIO_SYS_fchmod SYS_fchmod -#endif -#if defined(SYS_chown) -#define SYSIO_SYS_chown SYS_chown -#endif -#if defined(SYS_fchown) -#define SYSIO_SYS_fchown SYS_fchown -#endif -#if defined(SYS_umask) -#define SYSIO_SYS_umask SYS_umask -#endif -#if defined(SYS_mkdir) -#define SYSIO_SYS_mkdir SYS_mkdir -#endif -#if defined(SYS_rmdir) -#define SYSIO_SYS_rmdir SYS_rmdir -#endif -#if defined(SYS_getdirentries) -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_getdirentries64) -#define SYSIO_SYS_getdirentries SYS_getdirentries64 -#elif defined(SYS_getdirentries) -#define SYSIO_SYS_getdirentries SYS_getdirentries -#endif -#endif -#if defined(_LARGEFILE64_SOURCE) && defined(SYS_getdents64) -# define SYSIO_SYS_getdents64 SYS_getdents64 -#elif defined(SYS_getdents) -# if defined (__mips64__) -# define SYSIO_SYS_getdents64 SYS_getdents -# else -# define SYSIO_SYS_getdents SYS_getdents -# endif -#endif -#if defined(SYS_link) -#define SYSIO_SYS_link SYS_link -#endif -#if defined(SYS_unlink) -#define SYSIO_SYS_unlink SYS_unlink -#endif -#if defined(SYS_symlink) -#define SYSIO_SYS_symlink SYS_symlink -#endif -#if defined(SYS_rename) -#define SYSIO_SYS_rename SYS_rename -#endif -#if defined(SYS_readlink) -#define SYSIO_SYS_readlink SYS_readlink -#endif -#if defined(SYS_utimes) -#define SYSIO_SYS_utimes SYS_utimes -#endif -#if defined(SYS_utime) -#define SYSIO_SYS_utime SYS_utime -#endif -#if defined(SYS_socketcall) -# if defined (__mips64__) -# define SYSIO_SYS_socketcall SYS_O32_socketcall -# else -# define SYSIO_SYS_socketcall SYS_socketcall -# endif -#endif -#if defined(SYS_socket) -#define SYSIO_SYS_socket SYS_socket -#endif -#if defined(SYS_accept) -#define SYSIO_SYS_accept SYS_accept -#endif -#if defined(SYS_bind) -#define SYSIO_SYS_bind SYS_bind -#endif -#if defined(SYS_listen) -#define SYSIO_SYS_listen SYS_listen -#endif -#if defined(SYS_connect) -#define SYSIO_SYS_connect SYS_connect -#endif -#if defined(SYS_ioctl) -#define SYSIO_SYS_ioctl SYS_ioctl -#endif diff --git a/libsysio/include/stddir.h b/libsysio/include/stddir.h deleted file mode 100644 index ff830c7831d861e5325ed0beec83c41775030042..0000000000000000000000000000000000000000 --- a/libsysio/include/stddir.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Support for directory functions - */ - -#ifndef _STDDIR_H_ -#define _STDDIR_H_ - -#undef BUFSIZE -#define BUFSIZE 4096 - -struct __dirstream { - int fd; - _SYSIO_OFF_T base; /* start pos for next system call */ - _SYSIO_OFF_T filepos; /* current pos in dir file stream */ - size_t cur; /* current byte pos in data buffer */ - size_t effective; /* effective data size in buffer */ - char buf[BUFSIZE]; -}; - -#ifndef MAX -#define MAX(a,b) (a) > (b) ? (a) : (b) -#endif - -#endif /* ! _STDDIR_H_ */ diff --git a/libsysio/include/sysio-cmn.h b/libsysio/include/sysio-cmn.h deleted file mode 100644 index abb6ddd807f6735bd358971c3b79fb0d002d7fe4..0000000000000000000000000000000000000000 --- a/libsysio/include/sysio-cmn.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * System IO common information. - */ - -#if !defined(__IS_UNUSED) && defined(__GNUC__) -#define __IS_UNUSED __attribute__ ((unused)) -#else -#define __IS_UNUSED -#endif - -/* - * Define internal file-offset type and it's maximum value. - */ -#ifdef _LARGEFILE64_SOURCE -#define _SYSIO_OFF_T off64_t -#ifdef LLONG_MAX -#define _SYSIO_OFF_T_MAX (LLONG_MAX) -#else -/* - * Don't have LLONG_MAX before C99. We'll need to define it ourselves. - */ -#define _SYSIO_OFF_T_MAX (9223372036854775807LL) -#endif -#else -#define _SYSIO_OFF_T off_t -#define _SYSIO_OFF_T_MAX LONG_MAX -#endif - -/* - * Internally, all file status is carried in the 64-bit capable - * structure. - */ -#ifdef _LARGEFILE64_SOURCE -#define intnl_xtvec xtvec64 -#else -#define intnl_xtvec xtvec -#endif -struct intnl_xtvec; - -struct iovec; - -/* - * Symbol composition. - */ -#define _PREPEND_HELPER(p, x) \ - p ## x -#define PREPEND(p, x) \ - _PREPEND_HELPER(p, x) - -/* - * SYSIO name label macros - */ -#ifndef SYSIO_INTERFACE_NAME -#ifdef SYSIO_LABEL_NAMES -#define SYSIO_INTERFACE_NAME(x) \ - PREPEND(SYSIO_LABEL_NAMES, x) -#else -#define SYSIO_INTERFACE_NAME(x) x -#endif /* SYSIO_LABEL_NAMES */ -#endif /* !SYSIO_INTERFACE_NAME */ - -/* for debugging */ -#if 0 -#define ASSERT(cond) \ - if (!(cond)) { \ - printf("ASSERTION(" #cond ") failed: " __FILE__ ":" \ - __FUNCTION__ ":%d\n", __LINE__); \ - abort(); \ - } - -#define ERROR(fmt, a...) \ - do { \ - printf("ERROR(" __FILE__ ":%d):" fmt, __LINE__, ##a); \ - while(0) - -#else -#define ERROR(fmt) do{}while(0) -#define ASSERT do{}while(0) -#endif - -/* - * SYSIO interface frame macros - * - * + DISPLAY_BLOCK; Allocates storage on the stack for use by the set of - * macros. - * + ENTER; Performs entry point work - * + RETURN; Returns a value and performs exit point work - * - * NB: For RETURN, the arguments are the return value and value for errno. - * If the value for errno is non-zero then that value, *negated*, is set - * into errno. - */ -#define SYSIO_INTERFACE_DISPLAY_BLOCK \ - int _saved_errno; -#define SYSIO_INTERFACE_ENTER \ - do { \ - _saved_errno = errno; \ - SYSIO_ENTER; \ - } while (0) -#define SYSIO_INTERFACE_RETURN(rtn, err) \ - do { \ - SYSIO_LEAVE; \ - errno = (err) ? -(err) : _saved_errno; \ - return (rtn); \ - } while(0) - -/* Interface enter/leave hook functions */ -#ifdef SYSIO_TRACING -extern void *_sysio_entry_trace_q; -extern void *_sysio_exit_trace_q; - -extern void *_sysio_register_trace(void *q, - void (*)(const char *file, - const char *func, - int line, - void *data), - void *data, - void (*destructor)(void *data)); -extern void _sysio_remove_trace(void *q, void *p); -extern void _sysio_run_trace_q(void *q, - const char *file, - const char *func, - int line); -#define SYSIO_ENTER \ - do { \ - _sysio_run_trace_q(_sysio_entry_trace_q, \ - __FILE__, __func__, __LINE__); \ - } while (0) - - -#define SYSIO_LEAVE \ - do { \ - _sysio_run_trace_q(_sysio_exit_trace_q, \ - __FILE__, __func__, __LINE__); \ - } while (0) -#else -#define SYSIO_ENTER \ - do { } while (0) -#define SYSIO_LEAVE \ - do { } while (0) -#endif - -/* Accounting for IO stats; Read and write character count. */ -#if defined(REDSTORM) -#define _SYSIO_UPDACCT(w, cc) \ - do { \ - if ((cc) < 0) \ - break; \ - if (w) \ - _add_iostats(0, (size_t )(cc)); \ - else \ - _add_iostats((size_t )(cc), 0); \ - } while(0) -#else -#define _SYSIO_UPDACCT(w, cc) -#endif - -extern ssize_t _sysio_validx(const struct intnl_xtvec *xtv, size_t xtvlen, - const struct iovec *iov, size_t iovlen, - _SYSIO_OFF_T limit); -extern ssize_t _sysio_enumerate_extents(const struct intnl_xtvec *xtv, - size_t xtvlen, - const struct iovec *iov, - size_t iovlen, - ssize_t (*f)(const struct iovec *, - int, - _SYSIO_OFF_T, - ssize_t, - void *), - void *arg); -extern ssize_t _sysio_enumerate_iovec(const struct iovec *iov, - size_t count, - _SYSIO_OFF_T off, - ssize_t limit, - ssize_t (*f)(void *, - size_t, - _SYSIO_OFF_T, - void *), - void *arg); -extern ssize_t _sysio_doio(const struct intnl_xtvec *xtv, size_t xtvlen, - const struct iovec *iov, size_t iovlen, - ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *), - void *arg); diff --git a/libsysio/include/sysio-symbols.h b/libsysio/include/sysio-symbols.h deleted file mode 100644 index 4b7cf5611f4c577cab54d3d7fa7753fab8716688..0000000000000000000000000000000000000000 --- a/libsysio/include/sysio-symbols.h +++ /dev/null @@ -1,26 +0,0 @@ -#if defined(HAVE_ASM_WEAK_DIRECTIVE) || defined(HAVE_ASM_WEAKEXT_DIRECTIVE) -#define HAVE_WEAK_SYMBOLS -#endif - -#define STRINGOF(x) #x - -/* - * Define alias, asym, as a strong alias for symbol, sym. - */ -#define sysio_sym_strong_alias(sym, asym) \ - extern __typeof(sym) asym __attribute__((alias(STRINGOF(sym)))); - -#ifdef HAVE_WEAK_SYMBOLS - -/* - * Define alias, asym, as a strong alias for symbol, sym. - */ -#define sysio_sym_weak_alias(sym, asym) \ - extern __typeof(sym) asym __attribute__((weak, alias(STRINGOF(sym)))); -#else /* !defined(HAVE_ASM_WEAK_DIRECTIVE) */ - -/* - * Weak symbols not supported. Make it a strong alias then. - */ -#define sysio_sym_weak_alias(sym, asym) sysio_sym_strong_alias(sym, asym) -#endif diff --git a/libsysio/include/sysio.h b/libsysio/include/sysio.h deleted file mode 100644 index e64ead5e90bf53d02f8f1c297b7bae33b498b683..0000000000000000000000000000000000000000 --- a/libsysio/include/sysio.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * System IO common information. - */ - -#include <limits.h> -#include <stdarg.h> - -#include "sysio-cmn.h" -#include "creds.h" - -#if defined(_DIRENT_H) && _DIRENT_H -/* - * Need directory access routines too. - */ -#define _DECLARE_DIR_ACCESS 1 -#else -#define _DECLARE_DIR_ACCESS 0 -#endif - -#ifndef PATH_SEPARATOR -/* - * Path separator. - */ -#define PATH_SEPARATOR '/' -#endif - -#ifndef MAX_SYMLINK -/* - * Max recursion depth allowed when resoving symbolic links. - */ -#define MAX_SYMLINK 250 -#endif - -/* - * Internally, all directory entries are carried in the 64-bit capable - * structure. - */ -#ifdef _LARGEFILE64_SOURCE -#define intnl_dirent dirent64 -#else -#define intnl_dirent dirent -#endif -struct dirent; - -/* - * Internally, all file status is carried in the 64-bit capable - * structure. - */ -#ifdef _LARGEFILE64_SOURCE -#define intnl_stat stat64 -#else -#define intnl_stat stat -#endif -struct stat; - -#ifdef _HAVE_STATVFS -#ifdef _LARGEFILE64_SOURCE -#define intnl_statvfs statvfs64 -#else -#define intnl_statvfs statvfs -#define INTNL_STATVFS_IS_NATURAL 1 -#endif -struct statvfs; -struct intnl_statvfs; -#endif - -struct utimbuf; - -struct intnl_stat; - -struct pnode; - -#ifdef DEFER_INIT_CWD -extern const char *_sysio_init_cwd; -#endif - -extern struct pnode *_sysio_cwd; - -extern mode_t _sysio_umask; - -extern int _sysio_init(void); -extern void _sysio_shutdown(void); -#ifdef ZERO_SUM_MEMORY -extern void _sysio_access_shutdown(void); -#endif - -#if 0 -struct _sysio_boot_ctl { - const char *onam; - const char *oarg; -}; -#endif - -extern int _sysio_boot(const char *opt, const char *arg); - -/* - * Option-value pair information. - */ -struct option_value_info { - const char *ovi_name; /* name */ - char *ovi_value; /* value */ -}; - -extern const char * _sysio_get_token(const char *buf, - int accepts, - const char *delim, - const char *ignore, - char *tbuf); -extern char * _sysio_get_args(char *buf, struct option_value_info *vec); - -#define _SYSIO_LOCAL_TIME() _sysio_local_time() - -extern time_t _sysio_local_time(void); - -#ifdef SYSIO_TRACING -extern void _sysio_cprintf(const char *fmt, ...); -#endif - -/* - * The following should be defined by the system includes, and probably are, - * but it's not illegal to have multiple externs, so long as they are the - * same. It helps when building the library in a standalone fashion. - */ -extern int SYSIO_INTERFACE_NAME(access)(const char *path, int amode); -extern int SYSIO_INTERFACE_NAME(chdir)(const char *path); -extern int SYSIO_INTERFACE_NAME(chmod)(const char *path, mode_t mode); -extern int SYSIO_INTERFACE_NAME(fchmod)(int fd, mode_t mode); -extern int SYSIO_INTERFACE_NAME(chown)(const char *path, uid_t owner, - gid_t group); -extern int SYSIO_INTERFACE_NAME(fchown)(int fd, uid_t owner, gid_t group); -extern int SYSIO_INTERFACE_NAME(close)(int d); -extern int SYSIO_INTERFACE_NAME(dup)(int oldfd); -extern int SYSIO_INTERFACE_NAME(dup2)(int oldfd, int newfd); -extern int SYSIO_INTERFACE_NAME(fcntl)(int fd, int cmd, ...); -extern int SYSIO_INTERFACE_NAME(fcntl64)(int fd, int cmd, ...); -extern int SYSIO_INTERFACE_NAME(fstat)(int fd, struct stat *buf); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(fstat64)(int fd, struct stat64 *buf); -extern int SYSIO_INTERFACE_NAME(lstat64)(const char *path, struct stat64 *buf); -#endif -extern int SYSIO_INTERFACE_NAME(fsync)(int fd); -extern char *SYSIO_INTERFACE_NAME(getcwd)(char *buf, size_t size); -extern off_t SYSIO_INTERFACE_NAME(lseek)(int fd, off_t offset, int whence); -#ifdef _LARGEFILE64_SOURCE -extern off64_t SYSIO_INTERFACE_NAME(lseek64)(int fd, off64_t offset, - int whence); -#endif -extern int SYSIO_INTERFACE_NAME(lstat)(const char *path, struct stat *buf); -#ifdef BSD -extern int SYSIO_INTERFACE_NAME(getdirentries)(int fd, char *buf, int nbytes , - long *basep); -#else -extern ssize_t SYSIO_INTERFACE_NAME(getdirentries)(int fd, char *buf, - size_t nbytes, off_t *basep); -#ifdef _LARGEFILE64_SOURCE -extern ssize_t SYSIO_INTERFACE_NAME(getdirentries64)(int fd, - char *buf, - size_t nbytes, - off64_t *basep); -#endif -#endif -extern int SYSIO_INTERFACE_NAME(mkdir)(const char *path, mode_t mode); -extern int SYSIO_INTERFACE_NAME(open)(const char *path, int flag, ...); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(open64)(const char *path, int flag, ...); -#endif -extern int SYSIO_INTERFACE_NAME(creat)(const char *path, mode_t mode); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(creat64)(const char *path, mode_t mode); -#endif -extern int SYSIO_INTERFACE_NAME(stat)(const char *path, struct stat *buf); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(stat64)(const char *path, struct stat64 *buf); -#endif -extern ssize_t SYSIO_INTERFACE_NAME(read)(int fd, void *buf, size_t count); -extern ssize_t SYSIO_INTERFACE_NAME(pread)(int fd, void *buf, size_t count, - off_t offset); -extern ssize_t SYSIO_INTERFACE_NAME(readv)(int fd, - const struct iovec *iov, - int count); -extern ssize_t SYSIO_INTERFACE_NAME(write)(int fd, - const void *buf, - size_t count); -extern ssize_t SYSIO_INTERFACE_NAME(pwrite)(int fd, - const void *buf, - size_t count, - off_t offset); -extern ssize_t SYSIO_INTERFACE_NAME(writev)(int fd, - const struct iovec *iov, - int count); -#ifdef _HAVE_STATVFS -extern int SYSIO_INTERFACE_NAME(statvfs)(const char *path, struct statvfs *buf); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(statvfs64)(const char *path, - struct statvfs64 *buf); -#endif -extern int SYSIO_INTERFACE_NAME(fstatvfs)(int fd, struct statvfs *buf); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(fstatvfs64)(int fd, struct statvfs64 *buf); -#endif -#endif -extern int SYSIO_INTERFACE_NAME(truncate)(const char *path, off_t length); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(truncate64)(const char *path, off64_t length); -#endif -extern int SYSIO_INTERFACE_NAME(ftruncate)(int fd, off_t length); -#ifdef _LARGEFILE64_SOURCE -extern int SYSIO_INTERFACE_NAME(ftruncate64)(int fd, off64_t length); -#endif -extern int SYSIO_INTERFACE_NAME(rmdir)(const char *path); -extern int SYSIO_INTERFACE_NAME(symlink)(const char *path1, const char *path2); -#ifdef HAVE_POSIX_1003_READLINK -extern ssize_t SYSIO_INTERFACE_NAME(readlink)(const char *path, -#else -extern int SYSIO_INTERFACE_NAME(readlink)(const char *path, -#endif - char *buf, - size_t bufsiz); -extern int SYSIO_INTERFACE_NAME(link)(const char *oldpath, const char *newpath); -extern int SYSIO_INTERFACE_NAME(unlink)(const char *path); -extern int SYSIO_INTERFACE_NAME(rename)(const char *oldpath, - const char *newpath); -extern int SYSIO_INTERFACE_NAME(fdatasync)(int fd); -extern int SYSIO_INTERFACE_NAME(ioctl)(int fd, unsigned long request, ...); -extern mode_t SYSIO_INTERFACE_NAME(umask)(mode_t mask); -extern int SYSIO_INTERFACE_NAME(mknod)(const char *path, - mode_t mode, dev_t dev); -extern int SYSIO_INTERFACE_NAME(utime)(const char *path, - const struct utimbuf *buf); -extern int SYSIO_INTERFACE_NAME(mount)(const char *source, const char *target, - const char *filesystemtype, - unsigned long mountflags, - const void *data); -extern int SYSIO_INTERFACE_NAME(umount)(const char *target); -#if _DECLARE_DIR_ACCESS -extern DIR *SYSIO_INTERFACE_NAME(opendir)(const char *name); -extern int SYSIO_INTERFACE_NAME(closedir)(DIR *dir); -extern struct dirent *SYSIO_INTERFACE_NAME(readdir)(DIR *dir); -extern int SYSIO_INTERFACE_NAME(scandir)(const char *dir, - struct dirent ***namelist, - int(*filter)(const struct dirent *), - int(*compar)(const void *, - const void *)); -#if defined(_BSD_SOURCE) || defined(_SVID_SOURCE) -extern ssize_t SYSIO_INTERFACE_NAME(getdirentries)(int fd, - char *buf, - size_t nbytes, - off_t *basep); -#endif -#endif /* _DECLARE_DIR_ACCESS */ - -#undef _DECLARE_DIR_ACCESS diff --git a/libsysio/include/xtio.h b/libsysio/include/xtio.h deleted file mode 100644 index a5798f1ca8949e78dfa828fd4562fb6e7a91da60..0000000000000000000000000000000000000000 --- a/libsysio/include/xtio.h +++ /dev/null @@ -1,339 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Extended application programmers interface for IO as found on Cray RedStorm - * and the other current SUNMos/Puma/Cougar/Catamount systems. - */ - -#ifndef _XTIO_H_ -#define _XTIO_H_ - -/* - * When compiled for use with libsysio, this allows one to move all the - * externals to a distinct namespace. When not, we want it to do nothing. - * - * NB: The choice of macro name here is dangerous. It's in the global - * namespace! We should fix that one of these days. - */ -#if !defined(SYSIO_INTERFACE_NAME) -#define SYSIO_INTERFACE_NAME(_n) _n -#endif - -#ifndef _IOID_T_DEFINED -#define _IOID_T_DEFINED -typedef void *ioid_t; - -#define IOID_FAIL 0 -#endif - -/* - * Structure for strided I/O. - */ -struct xtvec { -#ifndef __USE_FILE_OFFSET64 - __off_t xtv_off; /* Stride/Extent offset. */ -#else - __off64_t xtv_off; /* Stride/Extent offset. */ -#endif - size_t xtv_len; /* Stride/Extent length. */ -}; - -#ifdef __USE_LARGEFILE64 -struct xtvec64 { - __off64_t xtv_off; /* Stride/Extent offset. */ - size_t xtv_len; /* Stride/Extent length. */ -}; -#endif - -struct iovec; - -/* - * Get status of previously posted async file IO operation. - */ -extern int SYSIO_INTERFACE_NAME(iodone)(ioid_t ioid); - -/* - * Wait for completion of a previously posted asynch file IO request. - */ -extern ssize_t SYSIO_INTERFACE_NAME(iowait)(ioid_t ioid); - -/* - * Post asynch read into buffers mapped by an iovec from file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipreadv)(int fd, - const struct iovec *iov, - size_t count, - off_t offset); - -#ifdef _LARGEFILE64_SOURCE -/* - * Post asynch read into buffers mapped by an iovec from file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipread64v)(int fd, - const struct iovec *iov, - size_t count, - off64_t offset); -#endif - -/* - * Post asynch read into buffer from file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipread)(int fd, - void *buf, - size_t count, - off_t offset); - -#ifdef _LARGEFILE64_SOURCE -/* - * Post asynch read into buffer from file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipread64)(int fd, - void *buf, - size_t count, - off64_t offset); -#endif - -/* - * Read into buffers mapped by an iovec from file at given offset. - */ -extern ssize_t SYSIO_INTERFACE_NAME(preadv)(int fd, - const struct iovec *iov, - size_t count, - off_t offset); - -#ifdef _LARGEFILE64_SOURCE -/* - * Read into buffers mapped by an iovec from file at given offset. - */ -extern ssize_t SYSIO_INTERFACE_NAME(pread64v)(int fd, - const struct iovec *iov, - size_t count, - off64_t offset); -#endif - -/* - * Post asynch read into buffers mapped by an iovec. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ireadv)(int fd, - const struct iovec *iov, - int count); - -/* - * Read into buffer. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iread)(int fd, - void *buf, - size_t count); - -/* - * Post async read into buffers mapped by iovec from regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ireadx)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec *xtv, - size_t xtv_count); - -#ifdef __USE_LARGEFILE64 -/* - * Post async read into buffers mapped by iovec from regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iread64x)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec64 *xtv, - size_t xtv_count); -#endif - -/* - * Read into buffers mapped by iovec from regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ssize_t SYSIO_INTERFACE_NAME(readx)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec *xtv, - size_t xtv_count); - -#ifdef __USE_LARGEFILE64 -/* - * Read into buffers mapped by iovec from regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ssize_t SYSIO_INTERFACE_NAME(read64x)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec64 *xtv, - size_t xtv_count); -#endif - -/* - * Post asynch write from buffers mapped by an iovec to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipwritev)(int fd, - const struct iovec *iov, - size_t count, - off_t offset); -#ifdef _LARGEFILE64_SOURCE -/* - * Post asynch write from buffers mapped by an iovec to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipwrite64v)(int fd, - const struct iovec *iov, - size_t count, - off64_t offset); -#endif - -/* - * Post asynch write from buffer to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipwrite)(int fd, - const void *buf, - size_t count, - off_t offset); - -#ifdef _LARGEFILE64_SOURCE -/* - * Post asynch write from buffer to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(ipwrite64)(int fd, - const void *buf, - size_t count, - off64_t offset); -#endif - -/* - * Write from buffers mapped by an iovec to file at given offset. - */ -extern ssize_t SYSIO_INTERFACE_NAME(pwritev)(int fd, - const struct iovec *iov, - size_t count, - off_t offset); - -#ifdef _LARGEFILE64_SOURCE -/* - * Write from buffers mapped by an iovec to file at given offset. - */ -extern ssize_t SYSIO_INTERFACE_NAME(pwrite64v)(int fd, - const struct iovec *iov, - size_t count, - off64_t offset); -#endif - -/* - * Post asynch write from buffer to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iwritev)(int fd, - const struct iovec *iov, - int count); - -/* - * Write from buffer to file at given offset. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iwrite)(int fd, - const void *buf, - size_t count); - -/* - * Post async write from buffers mapped by iovec to regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iwritex)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec *xtv, - size_t xtv_count); - -#ifdef __USE_LARGEFILE64 -/* - * Post async write from buffers mapped by iovec to regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ioid_t SYSIO_INTERFACE_NAME(iwrite64x)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec64 *xtv, - size_t xtv_count); -#endif - -/* - * Write from buffers mapped by iovec to regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ssize_t SYSIO_INTERFACE_NAME(writex)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec *xtv, - size_t xtv_count); - -#ifdef __USE_LARGEFILE64 -/* - * Write from buffers mapped by iovec to regions mapped - * by xtvec. - * - * NB: An adaptation of "listio" from Argonne's PVFS. - */ -extern ssize_t SYSIO_INTERFACE_NAME(write64x)(int fd, - const struct iovec *iov, - size_t iov_count, - const struct xtvec64 *xtv, - size_t xtv_count); -#endif -#endif /* ! _XTIO_H_ */ diff --git a/libsysio/install-sh b/libsysio/install-sh deleted file mode 100755 index 6ce63b9f76bc6ab5ff08f967f52f8cf4c53a1353..0000000000000000000000000000000000000000 --- a/libsysio/install-sh +++ /dev/null @@ -1,294 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# -# This originates from X11R5 (mit/util/scripts/install.sh), which was -# later released in X11R6 (xc/config/util/install.sh) with the -# following copyright and license. -# -# Copyright (C) 1994 X Consortium -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- -# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -# Except as contained in this notice, the name of the X Consortium shall not -# be used in advertising or otherwise to promote the sale, use or other deal- -# ings in this Software without prior written authorization from the X Consor- -# tium. -# -# -# FSF changes to this file are in the public domain. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd=$cpprog - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd=$stripprog - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "$0: no input file specified" >&2 - exit 1 -else - : -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d "$dst" ]; then - instcmd=: - chmodcmd="" - else - instcmd=$mkdirprog - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f "$src" ] || [ -d "$src" ] - then - : - else - echo "$0: $src does not exist" >&2 - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "$0: no destination specified" >&2 - exit 1 - else - : - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d "$dst" ] - then - dst=$dst/`basename "$src"` - else - : - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' - ' -IFS="${IFS-$defaultIFS}" - -oIFS=$IFS -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS=$oIFS - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp=$pathcomp$1 - shift - - if [ ! -d "$pathcomp" ] ; - then - $mkdirprog "$pathcomp" - else - : - fi - - pathcomp=$pathcomp/ -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd "$dst" && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dst"; else : ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dst"; else : ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dst"; else : ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dst"; else : ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename "$dst"` - else - dstfile=`basename "$dst" $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename "$dst"` - else - : - fi - -# Make a couple of temp file names in the proper directory. - - dsttmp=$dstdir/_inst.$$_ - rmtmp=$dstdir/_rm.$$_ - -# Trap to clean up temp files at exit. - - trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0 - trap '(exit $?); exit' 1 2 13 15 - -# Move or copy the file name to the temp name - - $doit $instcmd "$src" "$dsttmp" && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd "$dsttmp"; else :;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd "$dsttmp"; else :;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd "$dsttmp"; else :;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd "$dsttmp"; else :;fi && - -# Now remove or move aside any old file at destination location. We try this -# two ways since rm can't unlink itself on some systems and the destination -# file might be busy for other reasons. In this case, the final cleanup -# might fail but the new file should still install successfully. - -{ - if [ -f "$dstdir/$dstfile" ] - then - $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null || - $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null || - { - echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2 - (exit 1); exit - } - else - : - fi -} && - -# Now rename the file to the real destination. - - $doit $mvcmd "$dsttmp" "$dstdir/$dstfile" - -fi && - -# The final little trick to "correctly" pass the exit status to the exit trap. - -{ - (exit 0); exit -} diff --git a/libsysio/misc/gdb-libsysio b/libsysio/misc/gdb-libsysio deleted file mode 100644 index dd3f61355828dc08d4aa2f83aec5caf00d9052f1..0000000000000000000000000000000000000000 --- a/libsysio/misc/gdb-libsysio +++ /dev/null @@ -1,127 +0,0 @@ -# This Cplant(TM) source code is the property of Sandia National -# Laboratories. -# -# This Cplant(TM) source code is copyrighted by Sandia National -# Laboratories. -# -# The redistribution of this Cplant(TM) source code is subject to the -# terms of the GNU Lesser General Public License -# (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) -# -# Cplant(TM) Copyright 1998-2003 Sandia Corporation. -# Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the US Government. -# Export of this program may require a license from the United States -# Government. - -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# Questions or comments about this library should be sent to: -# -# Lee Ward -# Sandia National Laboratories, New Mexico -# P.O. Box 5800 -# Albuquerque, NM 87185-1110 -# -# lee@sandia.gov - -# -# Useful commands for debugging libsysio in gdb -# - -define x_dump_pbnode - printf "%p: ", $arg0 - if $arg0->pb_name.name - printf " \"%s\"", \ - $arg0->pb_name.name - else - printf " <NULL>" - end - printf " aliases:[" - set $x_p = $arg0->pb_aliases.lh_first - while $x_p - printf "<%p r:%d mnt:%p>", \ - $x_p, \ - $x_p->p_ref, \ - $x_p->p_mount - set $x_p = $x_p->p_links.le_next - end - printf "]\n" -end -document x_dump_pbnode -Dump path-base node and it's aliases - -Usage: x_dump_pbnode <pbnode> -end - -define __x_dump_pnode - printf "%spnode %p, mount %p, base: ", $arg0, $arg1, $arg1->p_mount - x_dump_pbnode $arg1->p_base -end - -define x_dump_pnode - __x_dump_pnode "" $arg0 -end -document x_dump_pnode -Dump path node information - -Usage: x_dump_pnode <pnode> -end - -define x_dump_mount - printf "MOUNT %p: root pnode %p, covers %p\n", \ - $arg0, $arg0->mnt_root, $arg0->mnt_covers - set $_x_dump_mount_var_pno = _sysio_pnodes->tqh_first - while $_x_dump_mount_var_pno != 0 -printf "%p, %p\n", $_x_dump_mount_var_pno, $arg0 - if $_x_dump_mount_var_pno->p_mount == $arg0 - __x_dump_pnode " " $_x_dump_mount_var_pno - end - set $_x_dump_mount_var_pno = \ - $_x_dump_mount_var_pno->p_nodes.tqe_next - end -end -document x_dump_mount -Dump single mount record information - -Usage: x_dump_mount <mnt> -end - -define x_dump_mounts - set $__x_dump_mounts_var_mnt = mounts.lh_first - while $__x_dump_mounts_var_mnt - x_dump_mount $__x_dump_mounts_var_mnt - set $__x_dump_mounts_var_mnt = \ - $__x_dump_mounts_var_mnt->mnt_link.le_next - end -end -document x_dump_mounts -Dump the contents of the libsysio mount table - -Usage: x_dump_mounts -end - -define x_dump_pnodes - set $_x_dump_pnodes_var_pno = _sysio_pnodes.tqh_first - while $_x_dump_pnodes_var_pno - x_dump_pnode $_x_dump_pnodes_var_pno - set $_x_dump_pnodes_var_pno = \ - $_x_dump_pnodes_var_pno->p_nodes.tqe_next - end -end - -br _sysio_unmount_all -run -r /tmp/lee foo bar -x_dump_pnodes diff --git a/libsysio/misc/init-env.sh b/libsysio/misc/init-env.sh deleted file mode 100644 index ae1f88153626153f4044249774a9feeb530d6bb8..0000000000000000000000000000000000000000 --- a/libsysio/misc/init-env.sh +++ /dev/null @@ -1,40 +0,0 @@ -# -# Source this file. It will craft a usable name space for your testing. -# -# Lee; Sun Feb 8 18:02:16 EST 2004 -# -# Note: We really should support symlinks someday. -# -unset _root_flags -unset _extras -if [ "x${SYSIO_AUTOMOUNT}" == "xyes" ]; then - _root_flags="2" - # - # Add a /auto directory for automounted file systems. We - # craft one automount that mounts /usr/home from the native - # file system. Further automounts in the sub-mounts are not enabled. - # - _extras=" \ - {mnt, dev=\"incore:0755\",dir=\"/mnt\",fl=2} \ - {creat, ft=dir,nm=\"/mnt/home\",pm=04755} \ - {creat, ft=file,nm=\"/mnt/home/.mount\",pm=0600, \ - str=\"native:/home\"} \ - " -fi -export SYSIO_NAMESPACE="\ - {mnt, dev=\"native:/\",dir=/,fl=${_root_flags:-0}} \ - {mnt, dev=\"incore:0755\",dir=\"/dev\"} \ - {creat, ft=chr,nm=\"/dev/stdin\",pm=0400,mm=0+0} \ - {creat, ft=chr,nm=\"/dev/stdout\",pm=0200,mm=0+1} \ - {creat, ft=chr,nm=\"/dev/stderr\",pm=0200,mm=0+2} \ - {creat, ft=dir,nm=\"/dev/fd\",pm=0755} \ - {creat, ft=chr,nm=\"/dev/fd/0\",pm=0400,mm=0+0} \ - {open, nm=\"/dev/fd/0\",fd=0,m=0} \ - {creat, ft=chr,nm=\"/dev/fd/1\",pm=0200,mm=0+1} \ - {open, nm=\"/dev/fd/1\",fd=1,m=1} \ - {creat, ft=chr,nm=\"/dev/fd/2\",pm=0200,mm=0+2} \ - {open, nm=\"/dev/fd/2\",fd=2,m=1} \ - ${_extras} \ -" -unset _root_flags -unset _extras diff --git a/libsysio/missing b/libsysio/missing deleted file mode 100755 index fc54c64ec969d9c1630335d8a88c5d72d3ca0856..0000000000000000000000000000000000000000 --- a/libsysio/missing +++ /dev/null @@ -1,336 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. -# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. -# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -case "$1" in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case "$1" in - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch]" - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing 0.4 - GNU automake" - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - - aclocal*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. - You can get \`$1' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` - test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` - fi - if [ -f "$file" ]; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then - # We have makeinfo, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - tar) - shift - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - fi - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case "$firstarg" in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case "$firstarg" in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequisites for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 diff --git a/libsysio/mkinstalldirs b/libsysio/mkinstalldirs deleted file mode 100755 index d2d5f21b611235316317197d3a32c2dff5897a6f..0000000000000000000000000000000000000000 --- a/libsysio/mkinstalldirs +++ /dev/null @@ -1,111 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman <friedman@prep.ai.mit.edu> -# Created: 1993-05-16 -# Public domain - -errstatus=0 -dirmode="" - -usage="\ -Usage: mkinstalldirs [-h] [--help] [-m mode] dir ..." - -# process command line arguments -while test $# -gt 0 ; do - case $1 in - -h | --help | --h*) # -h for help - echo "$usage" 1>&2 - exit 0 - ;; - -m) # -m PERM arg - shift - test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } - dirmode=$1 - shift - ;; - --) # stop option processing - shift - break - ;; - -*) # unknown option - echo "$usage" 1>&2 - exit 1 - ;; - *) # first non-opt arg - break - ;; - esac -done - -for file -do - if test -d "$file"; then - shift - else - break - fi -done - -case $# in - 0) exit 0 ;; -esac - -case $dirmode in - '') - if mkdir -p -- . 2>/dev/null; then - echo "mkdir -p -- $*" - exec mkdir -p -- "$@" - fi - ;; - *) - if mkdir -m "$dirmode" -p -- . 2>/dev/null; then - echo "mkdir -m $dirmode -p -- $*" - exec mkdir -m "$dirmode" -p -- "$@" - fi - ;; -esac - -for file -do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d - do - pathcomp="$pathcomp$d" - case $pathcomp in - -*) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - else - if test ! -z "$dirmode"; then - echo "chmod $dirmode $pathcomp" - lasterr="" - chmod "$dirmode" "$pathcomp" || lasterr=$? - - if test ! -z "$lasterr"; then - errstatus=$lasterr - fi - fi - fi - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# End: -# mkinstalldirs ends here diff --git a/libsysio/src/.cvsignore b/libsysio/src/.cvsignore deleted file mode 100644 index ec96903b9d05c45b7fb9e6f057c456661be09b81..0000000000000000000000000000000000000000 --- a/libsysio/src/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -.deps -.dirstamp diff --git a/libsysio/src/access.c b/libsysio/src/access.c deleted file mode 100644 index 80e8fcd9ed005ea528c5cad7a339ad55ea212153..0000000000000000000000000000000000000000 --- a/libsysio/src/access.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <errno.h> -#include <unistd.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "mount.h" -#include "fs.h" -#include "inode.h" -#include "sysio-symbols.h" - -/* - * Use a persistent buffer for gids. No, not a cache. We just want to - * avoid calling malloc over, and over, and... - */ -static gid_t *gids = NULL; -static int gidslen = 0; - -/* - * Check given access type on given inode. - */ -int -_sysio_check_permission(struct pnode *pno, struct creds *crp, int amode) -{ - mode_t mask; - struct inode *ino; - int err; - struct intnl_stat *stat; - gid_t *gids; - int ngids; - int group_matched; - - /* - * Check amode. - */ - if ((amode & (R_OK|W_OK|X_OK)) != amode) - return -EINVAL; - - if (!amode) - return 0; - - mask = 0; - if (amode & R_OK) - mask |= S_IRUSR; - if (amode & W_OK) - mask |= S_IWUSR; - if (amode & X_OK) - mask |= S_IXUSR; - - ino = pno->p_base->pb_ino; - assert(ino); - - err = -EACCES; /* assume error */ - stat = &ino->i_stbuf; - do { - /* - * Owner? - */ - if (stat->st_uid == crp->creds_uid) { - if ((stat->st_mode & mask) == mask) - err = 0; - break; - } - - /* - * Group? - */ - mask >>= 3; - group_matched = 0; - gids = crp->creds_gids; - ngids = crp->creds_ngids; - while (ngids) { - ngids--; - if (stat->st_gid == *gids++) { - group_matched = 1; - if ((stat->st_mode & mask) == mask) - err = 0; - } - } - if (group_matched) - break; - - /* - * Other? - */ - mask >>= 3; - if ((stat->st_mode & mask) == mask) - err = 0; - } while (0); - if (err) - return err; - - /* - * Check for RO access to the file due to mount - * options. - */ - if (amode & W_OK && IS_RDONLY(pno)) - return -EROFS; - - return 0; -} - -/* - * Cache groups. - */ -static int -_sysio_ldgroups(gid_t gid0, gid_t **gidsp, int *gidslenp) -{ - int n, i; - void *p; - - n = *gidslenp; - if (n < 8) { - *gidsp = NULL; - n = 8; - } - for (;;) { - /* - * This is far more expensive than I would like. Each time - * called it has to go to some length to acquire the - * current uid and groups membership. We can't just cache - * the result, either. The caller could have altered something - * asynchronously. Wish we had easy access to this info. - */ - if (n > *gidslenp) { - p = realloc(*gidsp, (size_t )n * sizeof(gid_t)); - if (!p) - return -errno; - *gidsp = p; - *gidslenp = n; - } - (*gidsp)[0] = gid0; - i = getgroups(n - 1, *gidsp + 1); - if (i < 0) { - if (errno != EINVAL) - return -errno; - if (INT_MAX / 2 < n) - return -EINVAL; - n *= 2; - continue; - } - break; - } - return i; -} - -/* - * Get current credentials. - */ -static int -_sysio_ldcreds(uid_t uid, gid_t gid, struct creds *crp) -{ - int n; - - n = _sysio_ldgroups(gid, &gids, &gidslen); - if (n < 0) - return n; - crp->creds_uid = uid; - crp->creds_gids = gids; - crp->creds_ngids = n; - - return 0; -} - -static int -_sysio_getcreds(struct creds *crp) -{ - - return _sysio_ldcreds(getuid(), getgid(), crp); -} - -/* - * Determine if a given access is permitted to a given file. - */ -int -_sysio_permitted(struct pnode *pno, int amode) -{ - struct creds cr; - int err; - - err = _sysio_ldcreds(geteuid(), getegid(), &cr); - if (err < 0) - return err; - err = _sysio_check_permission(pno, &cr, amode); - return err; -} - -#ifdef ZERO_SUM_MEMORY -/* - * Clean up persistent resource on shutdown. - */ -void -_sysio_access_shutdown() -{ - - if (gids) - free(gids); - gids = NULL; - gidslen = 0; -} -#endif - -int -SYSIO_INTERFACE_NAME(access)(const char *path, int amode) -{ - struct intent intent; - int err; - struct pnode *pno; - struct creds cr; - - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, path, 0, &intent, &pno); - if (err) - SYSIO_INTERFACE_RETURN(-1, err); - err = _sysio_ldcreds(geteuid(), getegid(), &cr); - if (err < 0) - goto out; - err = - _sysio_check_permission(pno, &cr, amode); -out: - P_RELE(pno); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __access -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(access), - PREPEND(__, SYSIO_INTERFACE_NAME(access))) -#endif diff --git a/libsysio/src/chdir.c b/libsysio/src/chdir.c deleted file mode 100644 index 3f5c90028b3cbfd3c2f9965fecf58fde682a7298..0000000000000000000000000000000000000000 --- a/libsysio/src/chdir.c +++ /dev/null @@ -1,278 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * ############################################################################# - * # - * # This Cplant(TM) source code is the property of Sandia National - * # Laboratories. - * # - * # This Cplant(TM) source code is copyrighted by Sandia National - * # Laboratories. - * # - * # The redistribution of this Cplant(TM) source code is subject to the - * # terms of the GNU Lesser General Public License - * # (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * # - * # Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * # Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * # license for use of this work by or on behalf of the US Government. - * # Export of this program may require a license from the United States - * # Government. - * # - * ############################################################################# - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <string.h> -#include <limits.h> -#include <errno.h> -#include <assert.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "mount.h" -#include "file.h" -#include "sysio-symbols.h" - -#ifdef DEFER_INIT_CWD -const char *_sysio_init_cwd = NULL; -#endif - -struct pnode *_sysio_cwd = NULL; - -/* - * Change to directory specified by the given pnode. - */ -int -_sysio_p_chdir(struct pnode *pno) -{ - int err; - - /* - * Revalidate the pnode, and ensure it's an accessable directory - */ - err = _sysio_p_validate(pno, NULL, NULL); - if (err) - return err; - if (!(pno->p_base->pb_ino && - S_ISDIR(pno->p_base->pb_ino->i_stbuf.st_mode))) - return -ENOTDIR; - if ((err = _sysio_permitted(pno, X_OK)) != 0) - return err; - - /* - * Release old if set. - */ - if (_sysio_cwd) - P_RELE(_sysio_cwd); - - /* - * Finally, change to the new. - */ - _sysio_cwd = pno; - - return 0; -} - -int -SYSIO_INTERFACE_NAME(chdir)(const char *path) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - SYSIO_INTERFACE_RETURN(-1, err); - - err = _sysio_p_chdir(pno); - if (err) - P_RELE(pno); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __chdir -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(chdir), - PREPEND(__, SYSIO_INTERFACE_NAME(chdir))) -#endif - -/* - * Return path tracked by the path ancestor chain. - * - * If the buf pointer is NULL, a buffer large enough to hold the path - * is allocated from the heap. - */ - -static int -_sysio_p_path(struct pnode *pno, char **buf, size_t size) -{ - struct pnode *cur; - size_t len; - size_t n; - char *cp; - - cur = pno; - - if (!size && buf && *buf) - return -EINVAL; - - /* - * Walk up the tree to the root, summing the component name - * lengths and counting the vertices. - */ - len = 0; - n = 0; - do { - /* - * If this is a covering path-node then the name should be - * the *covered* nodes name, not this one unless we are at - * the root of the name-space. - */ - while (pno == pno->p_mount->mnt_root && pno != pno->p_parent ) - pno = pno->p_mount->mnt_covers; - - /* - * Add length of this component to running sum and - * account for this vertex. - */ - assert((len >= pno->p_base->pb_name.len && - (size_t )~0 - pno->p_base->pb_name.len > len) || - (size_t )~0 - len > pno->p_base->pb_name.len); - len += pno->p_base->pb_name.len; - n++; - assert(n); - pno = pno->p_parent; - } while (pno != pno->p_parent); - - if (!*buf) - size = len + n + 1; - if (len >= size || n >= size - len) - return -ERANGE; - if (!*buf) { - /* - * Allocate path buffer from the heap. - */ - *buf = malloc(size * sizeof(char)); - if (!*buf) - return -ENOMEM; - } - - /* - * Fill in the path buffer. - */ - pno = cur; - cp = *buf + len + n; - *cp = '\0'; /* NUL terminate */ - do { - /* - * If this is a covering path-node then the name should be - * the *covered* nodes name, not this one unless we are at - * the root of the name-space. - */ - while (pno == pno->p_mount->mnt_root && pno != pno->p_parent ) - pno = pno->p_mount->mnt_covers; - - /* - * Add component and separator. - */ - cp -= pno->p_base->pb_name.len; - (void )memcpy(cp, pno->p_base->pb_name.name, - pno->p_base->pb_name.len); - - *--cp = PATH_SEPARATOR; - pno = pno->p_parent; - } while (pno != pno->p_parent); - - return 0; -} - -char * -SYSIO_INTERFACE_NAME(getcwd)(char *buf, size_t size) -{ - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; -#ifdef DEFER_INIT_CWD - if (!_sysio_cwd) { - struct pnode *pno; - - /* - * Can no longer defer initialization of the current working - * directory. Force namei to make it happen now. - */ - if (_sysio_namei(NULL, ".", 0, NULL, &pno) != 0) - abort(); - P_RELE(pno); - } -#endif - err = _sysio_p_path(_sysio_cwd, &buf, buf ? size : 0); - SYSIO_INTERFACE_RETURN(err ? NULL : buf, err); -} - -#ifdef __GLIBC__ -#undef __getcwd -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(getcwd), - PREPEND(__, SYSIO_INTERFACE_NAME(getcwd))) -#endif - -#if defined(PATH_MAX) && !(defined(REDSTORM)) -char * -SYSIO_INTERFACE_NAME(getwd)(char *buf) -{ - - if (!buf) { - errno = EFAULT; - return NULL; - } - - return SYSIO_INTERFACE_NAME(getcwd)(buf, PATH_MAX); -} -#endif diff --git a/libsysio/src/chmod.c b/libsysio/src/chmod.c deleted file mode 100644 index 936dec4446741da4f9682e6895cfa2d1e1d864e7..0000000000000000000000000000000000000000 --- a/libsysio/src/chmod.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "sysio-symbols.h" - -static int -do_chmod(struct pnode *pno, struct inode *ino, mode_t mode) -{ - int err; - struct intnl_stat stbuf; - unsigned mask; - - (void )memset(&stbuf, 0, sizeof(struct intnl_stat)); - stbuf.st_mode = mode & 07777; - mask = SETATTR_MODE; - err = _sysio_setattr(pno, ino, mask, &stbuf); - return err; -} - -int -SYSIO_INTERFACE_NAME(chmod)(const char *path, mode_t mode) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - err = do_chmod(pno, pno->p_base->pb_ino, mode); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __chmod -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(chmod), - PREPEND(__, SYSIO_INTERFACE_NAME(chmod))) -#endif - -int -SYSIO_INTERFACE_NAME(fchmod)(int fd, mode_t mode) -{ - int err; - struct file *fil; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - fil = _sysio_fd_find(fd); - if (!fil) { - err = -EBADF; - goto out; - } - - err = do_chmod(NULL, fil->f_ino, mode); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __fchmod -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fchmod), - PREPEND(__, SYSIO_INTERFACE_NAME(fchmod))) -#endif diff --git a/libsysio/src/chown.c b/libsysio/src/chown.c deleted file mode 100644 index 827a815ad3a0abf1072a40305d099b40e9eae3ae..0000000000000000000000000000000000000000 --- a/libsysio/src/chown.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "sysio-symbols.h" - -static int -_do_chown(struct pnode *pno, struct inode *ino, uid_t owner, gid_t group) -{ - int err; - struct intnl_stat stbuf; - unsigned mask; - - (void )memset(&stbuf, 0, sizeof(struct intnl_stat)); - mask = 0; - if (owner != (uid_t )-1) { - stbuf.st_uid = owner; - mask |= SETATTR_UID; - } - if (group != (gid_t )-1) { - stbuf.st_gid = group; - mask |= SETATTR_GID; - } - err = _sysio_setattr(pno, ino, mask, &stbuf); - return err; -} - -int -SYSIO_INTERFACE_NAME(chown)(const char *path, uid_t owner, gid_t group) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - - err = _do_chown(pno, pno->p_base->pb_ino, owner, group); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __chown -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(chown), - PREPEND(__, SYSIO_INTERFACE_NAME(chown))) -#endif - -int -SYSIO_INTERFACE_NAME(fchown)(int fd, uid_t owner, gid_t group) -{ - int err; - struct file *fil; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - fil = _sysio_fd_find(fd); - if (!fil) { - err = -EBADF; - goto out; - } - - err = _do_chown(NULL, fil->f_ino, owner, group); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __fchown -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fchown), - PREPEND(__, SYSIO_INTERFACE_NAME(fchown))) -#endif - diff --git a/libsysio/src/dev.c b/libsysio/src/dev.c deleted file mode 100644 index 7fca77a1689ec1e51949ff5237340b74530d4872..0000000000000000000000000000000000000000 --- a/libsysio/src/dev.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "dev.h" - -const struct inode_ops _sysio_nodev_ops = { - _sysio_nodev_inop_lookup, - _sysio_nodev_inop_getattr, - _sysio_nodev_inop_setattr, - _sysio_nodev_filldirentries, - _sysio_nodev_inop_mkdir, - _sysio_nodev_inop_rmdir, - _sysio_nodev_inop_symlink, - _sysio_nodev_inop_readlink, - _sysio_nodev_inop_open, - _sysio_nodev_inop_close, - _sysio_nodev_inop_link, - _sysio_nodev_inop_unlink, - _sysio_nodev_inop_rename, - _sysio_nodev_inop_read, - _sysio_nodev_inop_write, - _sysio_nodev_inop_pos, - _sysio_nodev_inop_iodone, - _sysio_nodev_inop_fcntl, - _sysio_nodev_inop_sync, - _sysio_nodev_inop_datasync, - _sysio_nodev_inop_ioctl, - _sysio_nodev_inop_mknod, -#ifdef _HAVE_STATVFS - _sysio_nodev_inop_statvfs, -#endif - _sysio_nodev_inop_gone -}; - -/* - * Support for pseudo-devices. - */ - -struct device { - const char *dev_name; - struct inode_ops dev_ops; -}; - -static struct device cdev[128]; - -int -_sysio_dev_init() -{ - unsigned major; - - major = 0; - do { - cdev[major].dev_name = NULL; - cdev[major].dev_ops = _sysio_nodev_ops; - } while (++major < sizeof(cdev) / sizeof(struct device)); - - return 0; -} - -/* - * Allocate major dev number in the dynamic range [128-255]. - */ -dev_t -_sysio_dev_alloc() -{ - unsigned short major; - static unsigned char c_major = 128; - - assert(c_major); - major = c_major++; - return SYSIO_MKDEV(major, 0); -} - -static int -dev_register(struct device devtbl[], - int major, - const char *name, - struct inode_ops *ops) -{ - - assert(major < 128); - - if (major < 0) { - major = sizeof(cdev) / sizeof(struct device); - while (major--) { - if (!devtbl[major].dev_name) - break; - } - } - if (major < 0) - return -ENXIO; /* I dunno, what? */ - if (devtbl[major].dev_name) - return -EEXIST; - devtbl[major].dev_name = name; - devtbl[major].dev_ops = *ops; - - return major; -} - -int -_sysio_char_dev_register(int major, const char *name, struct inode_ops *ops) -{ - - return dev_register(cdev, major, name, ops); -} - -struct inode_ops * -_sysio_dev_lookup(mode_t mode, dev_t dev) -{ - struct device *devtbl; - dev_t major; - - if (S_ISCHR(mode) || S_ISFIFO(mode)) - devtbl = cdev; - else - return (struct inode_ops *)&_sysio_nodev_ops; - - major = SYSIO_MAJOR_DEV(dev); - if (!(major < 128) || !devtbl[major].dev_name) - return (struct inode_ops *)&_sysio_nodev_ops; - - return &devtbl[major].dev_ops; -} diff --git a/libsysio/src/dup.c b/libsysio/src/dup.c deleted file mode 100644 index ba3d24c4e432b62ebae7c7a1c264131fb3bc6df6..0000000000000000000000000000000000000000 --- a/libsysio/src/dup.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "file.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(dup2)(int oldfd, int newfd) -{ - int fd; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (newfd < 0) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - fd = _sysio_fd_dup(oldfd, newfd, 1); - SYSIO_INTERFACE_RETURN(fd < 0 ? -1 : fd, fd < 0 ? fd : 0); -} - -#ifdef REDSTORM -#undef __dup2 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(dup2), - PREPEND(__, SYSIO_INTERFACE_NAME(dup2))) -#endif - -int -SYSIO_INTERFACE_NAME(dup)(int oldfd) -{ - int fd; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fd = _sysio_fd_dup(oldfd, -1, 0); - SYSIO_INTERFACE_RETURN(fd < 0 ? -1 : fd, fd < 0 ? fd : 0); -} - -#ifdef __GLIBC__ -#undef __dup -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(dup), - PREPEND(__, SYSIO_INTERFACE_NAME(dup))) -#endif diff --git a/libsysio/src/fcntl.c b/libsysio/src/fcntl.c deleted file mode 100644 index b779d7089ea3a3a8728c29655ff105999d251f9d..0000000000000000000000000000000000000000 --- a/libsysio/src/fcntl.c +++ /dev/null @@ -1,301 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2005 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <string.h> -#include <stdlib.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -#include "sysio-symbols.h" - -#ifdef HAVE_LUSTRE_HACK -#include <syscall.h> -#include <native.h> -#endif - -#ifdef HAVE_LUSTRE_HACK -static int -_sysio_lustre_fcntl(int fd, int cmd, va_list ap, int *rtn) -{ - long arg = va_arg(ap, long); - - *rtn = syscall(SYSIO_SYS_fcntl, fd, cmd, arg); - return *rtn == -1 ? -errno : 0; -} -#endif - -static int -_sysio_fcntl_raw_call(struct inode *ino, int *r, int cmd, ...) -{ - va_list ap; - int err; - - va_start(ap, cmd); - err = ino->i_ops.inop_fcntl(ino, cmd, ap, r); - va_end(ap); - return err; -} - -/* - * Convert offsets to absolute, when appropriate, and call appropriate driver - * to complete the fcntl lock function. If successful, convert - * returned values back to appropriate form. - */ -static int -_sysio_fcntl_lock(struct file *fil, int cmd, struct _SYSIO_FLOCK *fl) -{ - struct _SYSIO_FLOCK flock; - _SYSIO_OFF_T pos; - int err; - int rtn; - - /* - * The drivers will not have a clue as to the - * current position of the file pointer. We need to - * convert relative whence values to absolute - * file adresses for them, then. - */ - flock = *fl; - switch (flock.l_whence) { - case SEEK_SET: - /* - * At least parameter check this one, too. - */ - case SEEK_CUR: - case SEEK_END: - pos = - _sysio_lseek_prepare(fil, - flock.l_start, - flock.l_whence, - _SEEK_MAX(fil)); - if (pos < 0) - return (int )pos; - flock.l_start = pos; - flock.l_whence = SEEK_SET; - break; - default: - return -EINVAL; - } - err = - _sysio_fcntl_raw_call(fil->f_ino, &rtn, cmd, &flock); - if (err) - return err; - /* - * Ugh, convert back to relative form. - */ - switch (fl->l_whence) { - case SEEK_SET: - break; - case SEEK_CUR: - fl->l_start = flock.l_start; - fl->l_start -= fil->f_pos; - break; - case SEEK_END: - fl->l_start = flock.l_start; - fl->l_start -= - fil->f_ino->i_stbuf.st_size; - break; - default: - abort(); - } - /* - * Return success. - */ - return 0; -} - -static int -_sysio_vfcntl(int fd, int cmd, va_list ap) -{ - int err; - int rtn; - struct file *fil; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - fil = _sysio_fd_find(fd); - if (!fil) { -#ifdef HAVE_LUSTRE_HACK - err = _sysio_lustre_fcntl(fd, cmd, ap, &rtn); - goto out; -#else - rtn = -1; - err = -EBADF; - goto out; -#endif - } - - switch (cmd) { - - case F_DUPFD: - { - long newfd; - - newfd = va_arg(ap, long); - if (newfd != (int )newfd || newfd < 0) { - rtn = -1; - err = -EBADF; - goto out; - } - rtn = _sysio_fd_dup(fd, (int )newfd, 0); - if (rtn < 0) { - err = rtn; - rtn = -1; - } - } - break; -#if !(defined(_LARGEFILE64_SOURCE) || F_GETLK64 == F_GETLK) - case F_GETLK: - case F_SETLK: - case F_SETLKW: - { - struct intnl_stat buf; - struct flock *fl; -#ifdef _LARGEFILE64_SOURCE - struct _SYSIO_FLOCK flock64; -#endif - - /* - * Refresh the cached attributes. - */ - err = - fil->f_ino->i_ops.inop_getattr(NULL, - fil->f_ino, - &buf); - if (err) { - rtn = -1; - break; - } - /* - * Copy args to a temp and normalize. - */ - fl = va_arg(ap, struct flock *); -#ifdef _LARGEFILE64_SOURCE - flock64.l_type = fl->l_type; - flock64.l_whence = fl->l_whence; - flock64.l_start = fl->l_start; - flock64.l_len = fl->l_len; - flock64.l_pid = fl->l_pid; - err = _sysio_fcntl_lock(fil, cmd, &flock64); -#else - err = _sysio_fcntl_lock(fil, cmd, fl); -#endif - if (err < 0) { - rtn = -1; - break; - } -#ifdef _LARGEFILE64_SOURCE - /* - * Copy back. Note that the fcntl_lock call - * should have ensured that no overflow was possible. - */ - fl->l_type = flock64.l_type; - fl->l_whence = flock64.l_whence; - fl->l_start = flock64.l_start; - assert(fl->l_start == flock64.l_start); - fl->l_len = flock64.l_len; - assert(fl->l_len == flock64.l_len); - fl->l_pid = flock64.l_pid; -#endif - rtn = 0; - } - break; -#endif /* !(_LARGEFILE64_SOURCE || F_GETLK64 == F_GETLK) */ -#ifdef _LARGEFILE64_SOURCE - case F_GETLK64: - case F_SETLK64: - case F_SETLKW64: - { - struct flock64 *fl64; - - fl64 = va_arg(ap, struct flock64 *); - err = _sysio_fcntl_lock(fil, cmd, fl64); - rtn = err ? -1 : 0; - } - break; -#endif - default: - err = fil->f_ino->i_ops.inop_fcntl(fil->f_ino, cmd, ap, &rtn); - break; - } - -out: - SYSIO_INTERFACE_RETURN(rtn, err); -} - -int -SYSIO_INTERFACE_NAME(fcntl)(int fd, int cmd, ...) -{ - va_list ap; - int err; - - va_start(ap, cmd); - err = _sysio_vfcntl(fd, cmd, ap); - va_end(ap); - return err; -} - -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fcntl), - SYSIO_INTERFACE_NAME(fcntl64)) - -#ifdef __GLIBC__ -#undef __fcntl -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fcntl), - PREPEND(__, SYSIO_INTERFACE_NAME(fcntl))) -#endif - -#ifdef BSD -#undef _fcntl -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fcntl), - PREPEND(_, SYSIO_INTERFACE_NAME(fcntl))) -#endif diff --git a/libsysio/src/file.c b/libsysio/src/file.c deleted file mode 100644 index 9ed054ed27f19343603c5baff7fe675da8a84c09..0000000000000000000000000000000000000000 --- a/libsysio/src/file.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <assert.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "file.h" -#include "inode.h" - -/* - * Support for file IO. - */ - -/* - * The open files table and it's size. - */ -static struct file **_sysio_oftab = NULL; -static size_t _sysio_oftab_size = 0; - -/* - * Create and initialize open file record. - */ -struct file * -_sysio_fnew(struct inode *ino, int flags) -{ - struct file *fil; - - fil = malloc(sizeof(struct file)); - if (!fil) - return NULL; - - _SYSIO_FINIT(fil, ino, flags); - F_REF(fil); - I_REF(fil->f_ino); - - return fil; -} - -/* - * Destroy open file record. - */ -void -_sysio_fgone(struct file *fil) -{ - int err; - - assert(!fil->f_ref); - assert(fil->f_ino); - err = (*fil->f_ino->i_ops.inop_close)(fil->f_ino); - assert(!err); - I_RELE(fil->f_ino); - free(fil); -} - -/* - * IO operation completion handler. - */ -void -_sysio_fcompletio(struct ioctx *ioctx, struct file *fil) -{ - _SYSIO_OFF_T off; - - if (ioctx->ioctx_cc <= 0) - return; - - assert(ioctx->ioctx_ino == fil->f_ino); - off = fil->f_pos + ioctx->ioctx_cc; - if (fil->f_pos && off <= fil->f_pos) - abort(); - fil->f_pos = off; -} - -/* - * Grow (or truncate) the file descriptor table. - */ -static int -fd_grow(size_t n) -{ - size_t count; - struct file **noftab, **filp; - - /* - * Sanity check the new size. - */ - if ((int )n < 0) - return -EMFILE; - - /* - * We never shrink the table. - */ - if (n <= _sysio_oftab_size) - return 0; - - noftab = realloc(_sysio_oftab, n * sizeof(struct file *)); - if (!noftab) - return -ENOMEM; - _sysio_oftab = noftab; - count = _sysio_oftab_size; - _sysio_oftab_size = n; - filp = _sysio_oftab + count; - n -= count; - while (n--) - *filp++ = NULL; - return 0; -} - -#ifdef ZERO_SUM_MEMORY -void -_sysio_fd_shutdown() -{ - - free(_sysio_oftab); - _sysio_oftab_size = 0; -} -#endif - -/* - * Find a free slot in the open files table greater than or equal to the - * argument. - */ -static int -find_free_fildes(int low) -{ - int n; - int err; - struct file **filp; - - for (n = low, filp = _sysio_oftab + low; - n >= 0 && (unsigned )n < _sysio_oftab_size && *filp; - n++, filp++) - ; - if (n < 0) - return -ENFILE; - if ((unsigned )n >= _sysio_oftab_size) { - err = fd_grow((unsigned )n + 1); - if (err) - return err; - filp = &_sysio_oftab[n]; - assert(!*filp); - } - - return n; -} - -/* - * Find open file record from file descriptor. - */ -struct file * -_sysio_fd_find(int fd) -{ - if (fd < 0 || (unsigned )fd >= _sysio_oftab_size) - return NULL; - - return _sysio_oftab[fd]; -} - -/* - * Close an open descriptor. - */ -int -_sysio_fd_close(int fd) -{ - struct file *fil; - - fil = _sysio_fd_find(fd); - if (!fil) - return -EBADF; - - _sysio_oftab[fd] = NULL; - - F_RELE(fil); - - return 0; -} - -/* - * Associate open file record with given file descriptor (if forced), or any - * available file descriptor if less than zero, or any available descriptor - * greater than or equal to the given one if not forced. - */ -int -_sysio_fd_set(struct file *fil, int fd, int force) -{ - int err; - struct file *ofil; - - /* - * Search for a free descriptor if needed. - */ - if (fd < 0 || !force) { - if (fd < 0) - fd = 0; - fd = find_free_fildes(fd); - if (fd < 0) - return fd; - } - - if ((unsigned )fd >= _sysio_oftab_size) { - err = fd_grow((unsigned )fd + 1); - if (err) - return err; - } - - /* - * Remember old. - */ - ofil = _sysio_fd_find(fd); - /* - * Take the entry. - */ - _sysio_oftab[fd] = fil; - if (ofil) - F_RELE(ofil); - - return fd; -} - -/* - * Duplicate old file descriptor. - * - * If the new file descriptor is less than zero, the new file descriptor - * is chosen freely. Otherwise, choose an available descriptor greater - * than or equal to the new, if not forced. Otherwise, if forced, (re)use - * the new. - */ -int -_sysio_fd_dup(int oldfd, int newfd, int force) -{ - struct file *fil; - int fd; - - if (oldfd == newfd && oldfd >= 0) - return newfd; - - fil = _sysio_fd_find(oldfd); - if (!fil) - return -EBADF; - - fd = _sysio_fd_set(fil, newfd, force); - if (fd >= 0) - F_REF(fil); - return fd; -} - -int -_sysio_fd_close_all() -{ - int fd; - struct file **filp; - - /* - * Close all open descriptors. - */ - for (fd = 0, filp = _sysio_oftab; - (size_t )fd < _sysio_oftab_size; - fd++, filp++) { - if (!*filp) - continue; - F_RELE(*filp); - *filp = NULL; - } - - /* - * Release current working directory. - */ - if (_sysio_cwd) { - P_RELE(_sysio_cwd); - _sysio_cwd = NULL; - } - - return 0; -} diff --git a/libsysio/src/file_hack.c b/libsysio/src/file_hack.c deleted file mode 100644 index 2eb6105909800a8ee96c21d47ffe60b2281374c8..0000000000000000000000000000000000000000 --- a/libsysio/src/file_hack.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <stdlib.h> -#include <assert.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "file.h" -#include "inode.h" - -/* - * Support for file IO. - */ - -/* - * The open files table - */ -typedef struct oftab { - struct file **table; /* table array */ - size_t size; /* current table size */ - int offset; /* base fd number */ - int max; /* max size */ -} oftab_t; - -#define OFTAB_NATIVE (0) -#define OFTAB_VIRTUAL (1) - -static oftab_t _sysio_oftab[2] = { - {NULL, 0, 0, 0}, - {NULL, 0, 0, 1024*1024}, -}; - -static int native_max_fds = 0; - -static inline void init_oftab() -{ - if (!native_max_fds) { - native_max_fds = sysconf(_SC_OPEN_MAX); - if (native_max_fds <= 0) - abort(); - _sysio_oftab[OFTAB_NATIVE].max = native_max_fds - 1; - _sysio_oftab[OFTAB_VIRTUAL].offset = native_max_fds; - } -} - -static inline oftab_t *select_oftab(int fd) -{ - return & _sysio_oftab[fd >= native_max_fds || fd < 0]; -} - -/* - * Create and initialize open file record. - */ -struct file * -_sysio_fnew(struct inode *ino, int flags) -{ - struct file *fil; - - fil = malloc(sizeof(struct file)); - if (!fil) - return NULL; - - _SYSIO_FINIT(fil, ino, flags); - F_REF(fil); - I_REF(ino); - - return fil; -} - -/* - * Destroy open file record. - */ -void -_sysio_fgone(struct file *fil) -{ - int err; - - assert(!fil->f_ref); - assert(fil->f_ino); - err = (*fil->f_ino->i_ops.inop_close)(fil->f_ino); - I_RELE(fil->f_ino); - assert(!err); - free(fil); -} - -/* - * IO operation completion handler. - */ -void -_sysio_fcompletio(struct ioctx *ioctx, struct file *fil) -{ - _SYSIO_OFF_T off; - - if (ioctx->ioctx_cc <= 0) - return; - - assert(ioctx->ioctx_ino == fil->f_ino); - off = fil->f_pos + ioctx->ioctx_cc; - if (fil->f_pos && off <= fil->f_pos) - abort(); - fil->f_pos = off; -} - -/* - * Grow (or truncate) the file descriptor table. - */ -static int -fd_grow(oftab_t *oftab, size_t n) -{ - int fd; - size_t count; - struct file **noftab, **filp; - - /* - * Sanity check the new size. - */ - fd = (int )n; - if ((size_t )fd != n) - return -EMFILE; - - n++; /* index -> size */ - assert(n > oftab->size); - - if (n > oftab->max) - return -ERANGE; - - if (n < 8) - n = 8; - if (n - oftab->size < oftab->size) - n = (n + 1) * 2; - noftab = realloc(oftab->table, n * sizeof(struct file *)); - if (!noftab) - return -ENOMEM; - oftab->table = noftab; - count = oftab->size; - oftab->size = n; - if (n < count) - return 0; - filp = oftab->table + count; - n -= count; - while (n--) - *filp++ = NULL; - return 0; -} - -#ifdef ZERO_SUM_MEMORY -static void free_oftab(oftab_t *ot) -{ - if (ot->table) { - free(ot->table); - ot->size = 0; - } -} - -void -_sysio_fd_shutdown() -{ - free_oftab(&_sysio_oftab[OFTAB_NATIVE]); - free_oftab(&_sysio_oftab[OFTAB_VIRTUAL]); -} -#endif - -/* - * Find a free slot in the open files table which >= @low - * low < 0 means any - */ -static int -find_free_fildes(oftab_t *oftab, int low) - { - int n; - int err; - struct file **filp; - - if (low < 0) - low = oftab->offset; - - n = low - oftab->offset; - if (n < 0) - return -ENFILE; - - for (filp = oftab->table + n; - n < oftab->size && *filp; - n++, filp++) - ; - - if (n >= oftab->size) { - err = fd_grow(oftab, n); - if (err) - return err; - filp = &oftab->table[n]; - assert(!*filp); - } - - return oftab->offset + n; -} - -/* - * Find open file record from file descriptor. - * clear this entry if 'clear' is non-zero - */ -static struct file * -__sysio_fd_get(int fd, int clear) -{ - oftab_t *oftab; - struct file *file; - - init_oftab(); - - if (fd < 0) - return NULL; - - oftab = select_oftab(fd); - if (!oftab->table || fd >= oftab->offset + oftab->size) - return NULL; - - file = oftab->table[fd - oftab->offset]; - if (clear) - oftab->table[fd - oftab->offset] = NULL; - - return file; -} - -/* - * Find open file record from file descriptor. - */ -struct file * -_sysio_fd_find(int fd) -{ - return __sysio_fd_get(fd, 0); -} - -/* - * Close an open descriptor. - */ -int -_sysio_fd_close(int fd) -{ - struct file *fil; - - fil = fil = __sysio_fd_get(fd, 1); - if (!fil) - return -EBADF; - - F_RELE(fil); - - return 0; -} - -/* - * Associate open file record with given file descriptor (if forced), or any - * available file descriptor if less than zero, or any available descriptor - * greater than or equal to the given one if not forced. - */ -int -_sysio_fd_set(struct file *fil, int fd, int force) -{ - int err; - struct file *ofil; - oftab_t *oftab; - - if (force && fd < 0) - abort(); - - init_oftab(); - - oftab = select_oftab(fd); - - /* - * Search for a free descriptor if needed. - */ - if (!force) { - fd = find_free_fildes(oftab, fd); - if (fd < 0) - return fd; - } - - if (fd - oftab->offset >= oftab->size) { - err = fd_grow(oftab, fd - oftab->offset); - if (err) - return err; - } - - /* - * Remember old. - */ - ofil = __sysio_fd_get(fd, 1); - if (ofil) { - /* FIXME sometimes we could intercept open/socket to create - * a fd, but missing close()? currently we have this problem - * with resolv lib. as a workaround simply destroy the file - * struct here. And this hack will break the behavior of - * DUPFD. - */ - if (fd >= 0 && oftab == &_sysio_oftab[0]) - free(ofil); - else - F_RELE(ofil); - } - - oftab->table[fd - oftab->offset] = fil; - - return fd; -} - -/* - * Duplicate old file descriptor. - * - * If the new file descriptor is less than zero, the new file descriptor - * is chosen freely. Otherwise, choose an available descriptor greater - * than or equal to the new, if not forced. Otherwise, if forced, (re)use - * the new. - */ -int -_sysio_fd_dup(int oldfd, int newfd, int force) -{ - struct file *fil; - int fd; - - init_oftab(); - - if (oldfd == newfd && oldfd >= 0) - return newfd; - - fil = _sysio_fd_find(oldfd); - if (!fil) - return -EBADF; - - /* old & new must belong to the same oftab */ - if (select_oftab(oldfd) != select_oftab(newfd)) - return -EINVAL; - - fd = _sysio_fd_set(fil, newfd, force); - if (fd >= 0) - F_REF(fil); - return fd; -} - -void -_sysio_oftable_close_all(oftab_t *oftab) -{ - struct file **filp; - int fd; - - for (fd = 0, filp = oftab->table; - (size_t )fd < oftab->size; - fd++, filp++) { - if (!*filp) - continue; - F_RELE(*filp); - *filp = NULL; - } -} - -int -_sysio_fd_close_all() -{ - int fd; - struct file **filp; - oftab_t *oftab; - int i; - - /* - * Close all open descriptors. - */ - _sysio_oftable_close_all(&_sysio_oftab[OFTAB_VIRTUAL]); - /* XXX see liblustre/llite_lib.c for explaination */ -#if 0 - _sysio_oftable_close_all(&_sysio_oftab[OFTAB_NATIVE]); -#endif - - /* - * Release current working directory. - */ - if (_sysio_cwd) { - P_RELE(_sysio_cwd); - _sysio_cwd = NULL; - } - - return 0; -} diff --git a/libsysio/src/fs.c b/libsysio/src/fs.c deleted file mode 100644 index 0c9bd52f1c44a7a39e009d0e35ba03310d254029..0000000000000000000000000000000000000000 --- a/libsysio/src/fs.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "fs.h" -#include "inode.h" - -/* - * File system abstractipon support. - */ - -/* - * The "file system switch". - */ -static LIST_HEAD(, fsswent) fsswitch = { NULL }; - -/* - * Lookup named entry in the switch. - */ -struct fsswent * -_sysio_fssw_lookup(const char *name) -{ - struct fsswent *fssw; - - if (!fsswitch.lh_first) - return NULL; - - fssw = fsswitch.lh_first; - do { - if (strcmp(fssw->fssw_name, name) == 0) - return fssw; - fssw = fssw->fssw_link.le_next; - } while (fssw); - return NULL; -} - -/* - * Register driver. - */ -int -_sysio_fssw_register(const char *name, struct fssw_ops *ops) -{ - struct fsswent *fssw; - - fssw = _sysio_fssw_lookup(name); - if (fssw) - return -EEXIST; - - fssw = malloc(sizeof(struct fsswent) + strlen(name) + 1); - if (!fssw) - return -ENOMEM; - fssw->fssw_name = (char *)fssw + sizeof(struct fsswent); - (void )strcpy((char *)fssw->fssw_name, name); - fssw->fssw_ops = *ops; - - LIST_INSERT_HEAD(&fsswitch, fssw, fssw_link); - - return 0; -} - -#ifdef ZERO_SUM_MEMORY -/* - * Shutdown - */ -void -_sysio_fssw_shutdown() -{ - struct fsswent *fssw; - - while ((fssw = fsswitch.lh_first)) { - LIST_REMOVE(fssw, fssw_link); - free(fssw); - } -} -#endif - -/* - * Allocate and initialize a new file system record. - */ -struct filesys * -_sysio_fs_new(struct filesys_ops *ops, unsigned flags, void *private) -{ - struct filesys *fs; - - fs = malloc(sizeof(struct filesys)); - if (!fs) - return NULL; - FS_INIT(fs, flags, ops, private); - return fs; -} - -/* - * Dispose of given file system record. - */ -void -_sysio_fs_gone(struct filesys *fs) -{ - size_t n; - struct itable_entry *head; - - if (fs->fs_ref) - abort(); - n = FS_ITBLSIZ; - do { - head = &fs->fs_itbl[--n]; - while (head->lh_first) - _sysio_i_gone(head->lh_first); - } while (n); - if (n) - abort(); - - (*fs->fs_ops.fsop_gone)(fs); - free(fs); -} diff --git a/libsysio/src/fsync.c b/libsysio/src/fsync.c deleted file mode 100644 index dda9904faacee3420150f71e210b0b3c60342e09..0000000000000000000000000000000000000000 --- a/libsysio/src/fsync.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "file.h" -#include "inode.h" - -int -SYSIO_INTERFACE_NAME(fsync)(int fd) -{ - struct file *fil; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!(fil && fil->f_ino)) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - err = (*fil->f_ino->i_ops.inop_sync)(fil->f_ino); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -int -SYSIO_INTERFACE_NAME(fdatasync)(int fd) -{ - struct file *fil; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!(fil && fil->f_ino)) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - err = (*fil->f_ino->i_ops.inop_datasync)(fil->f_ino); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} diff --git a/libsysio/src/getdirentries.c b/libsysio/src/getdirentries.c deleted file mode 100644 index 151829d198e53606419ec1407eb2f2dc4fc3f042..0000000000000000000000000000000000000000 --- a/libsysio/src/getdirentries.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <stdlib.h> -#ifdef __GLIBC__ -#include <alloca.h> -#endif -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <dirent.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "sysio-symbols.h" - -#ifndef __GNUC__ -#define __restrict -#endif - -static ssize_t -filldirents(struct file *fil, - char *buf, size_t nbytes, - _SYSIO_OFF_T *__restrict basep) -{ - _SYSIO_OFF_T opos; - ssize_t cc; - - if (!S_ISDIR(fil->f_ino->i_stbuf.st_mode)) - return -ENOTDIR; - - opos = fil->f_pos; - cc = - (*fil->f_ino->i_ops.inop_filldirentries)(fil->f_ino, - &fil->f_pos, - buf, nbytes); - if (cc < 0) - return cc; - *basep = opos; - return cc; -} - -static ssize_t -PREPEND(_, SYSIO_INTERFACE_NAME(getdirentries64))(int fd, - char *buf, - size_t nbytes, - _SYSIO_OFF_T * __restrict - basep) -{ - struct file *fil; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - fil = _sysio_fd_find(fd); - if (!(fil && fil->f_ino)) { - SYSIO_INTERFACE_RETURN(-1, -EBADF); - } - - cc = filldirents(fil, buf, nbytes, basep); - SYSIO_INTERFACE_RETURN(cc < 0 ? -1 : cc, cc < 0 ? (int )cc : 0); -} - -#ifdef _LARGEFILE64_SOURCE -#undef getdirentries64 -sysio_sym_strong_alias(PREPEND(_, SYSIO_INTERFACE_NAME(getdirentries64)), - SYSIO_INTERFACE_NAME(getdirentries64)) -#endif - -#undef getdirentries - -#ifndef DIRENT64_IS_NATURAL - -#ifndef EOVERFLOW -#define EOVERFLOW ERANGE -#endif - -#ifdef _DIRENT_HAVE_D_NAMLEN -#define _namlen(dp) ((dp)->d_namlen) -#else -#define _namlen(dp) (strlen((dp)->d_name)) -#endif - -#ifndef _rndup -#define _rndup(n, boundary) \ - ((((n) + (boundary) - 1 ) / (boundary)) * (boundary)) -#endif - -#define _dbaselen ((size_t )&((struct dirent *)0)->d_name[0]) - -#ifdef __GLIBC__ -#define _dreclen(namlen) \ - ((_dbaselen + (namlen) + __alignof__ (struct dirent)) & \ - ~(__alignof__ (struct dirent) - 1)) -#else /* !defined(__GLIBC__) */ -#define _dreclen(namlen) \ - _rndup(_dbaselen + (namlen) + 1, sizeof(int)) -#endif - -#ifndef BSD -ssize_t -SYSIO_INTERFACE_NAME(getdirentries)(int fd, - char *buf, - size_t nbytes, - off_t * __restrict basep) -#else -int -SYSIO_INTERFACE_NAME(getdirentries)(int fd, - char *buf, - int nbytes, - long * __restrict basep) -#endif -{ - struct file *fil; - _SYSIO_OFF_T b; - ssize_t cc, count; - struct dirent64 *d64p, d64; - struct dirent *dp; - size_t n, reclen; - void *p; - char *cp; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - fil = _sysio_fd_find(fd); - if (!(fil && fil->f_ino)) { - SYSIO_INTERFACE_RETURN(-1, -EBADF); - } - - count = cc = filldirents(fil, buf, nbytes, &b); - d64p = (void *)buf; - dp = (void *)buf; - reclen = 0; - while (cc > 0) { - n = _namlen(d64p); - reclen = _dreclen(n); - d64.d_ino = d64p->d_ino; - d64.d_off = d64p->d_off; - d64.d_type = d64p->d_type; - d64.d_reclen = d64p->d_reclen; - /* - * Copy name first. - */ - (void )memcpy(dp->d_name, d64p->d_name, n); - /* - * Then, the rest. - */ - dp->d_ino = d64.d_ino; - dp->d_off = d64.d_off; - if (dp->d_ino != d64.d_ino || - dp->d_off != d64.d_off) { - /* - * If conversion failure then we are done. - */ - if (cc == count) { - /* - * Couldn't process any entries. We return - * the error now. - */ - cc = - EOVERFLOW; - } - break; - } - fil->f_pos = dp->d_off; - dp->d_type = d64.d_type; - dp->d_reclen = reclen; - /* - * Fill the remainder with zeros. - */ - p = (char *)dp + dp->d_reclen; -#ifdef HAVE_D_NAMLEN - dp->d_namlen = n; -#endif - cp = dp->d_name + n; - do { - *cp++ = 0; - } while (cp < (char *)p); - /* - * Advance. - */ - dp = p; - cc -= d64.d_reclen; - d64p = (struct dirent64 *)((char *)d64p + d64.d_reclen); - } - - if (cc < 0) - SYSIO_INTERFACE_RETURN(-1, cc); - cc = (char *)dp - buf; - *basep = b; - SYSIO_INTERFACE_RETURN(cc, 0); -} -#else /* !defined(DIRENT64_IS_NATURAL) */ -sysio_sym_strong_alias(PREPEND(_, SYSIO_INTERFACE_NAME(getdirentries64), - SYSIO_INTERFACE_NAME(getdirentries))) -#endif - -#ifdef REDSTORM -#undef __getdirentries -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(getdirentries), - PREPEND(__, SYSIO_INTERFACE_NAME(getdirentries))) -#endif -#if defined(BSD) || defined(REDSTORM) -#undef _getdirentries -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(getdirentries), - PREPEND(_, SYSIO_INTERFACE_NAME(getdirentries))) -#endif diff --git a/libsysio/src/init.c b/libsysio/src/init.c deleted file mode 100644 index 470b5c2ac8c81ed960f2cd98d67de512d314643b..0000000000000000000000000000000000000000 --- a/libsysio/src/init.c +++ /dev/null @@ -1,1102 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef __linux__ -#define _BSD_SOURCE -#endif - -#ifdef SYSIO_TRACING -#include <stdio.h> -#endif -#include <stdlib.h> -#if defined(_BSD_SOURCE) || defined(SYSIO_TRACING) -#include <sys/syscall.h> -#endif -#include <unistd.h> -#include <string.h> -#include <errno.h> -#ifdef SYSIO_TRACING -#include <stdarg.h> -#endif -#include <limits.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#ifdef SYSIO_TRACING -#include "native.h" -#endif -#include "inode.h" -#include "fs.h" -#include "mount.h" -#include "file.h" -#include "dev.h" - -#ifdef STDFD_DEV -#include "stdfd.h" -#endif - -#ifdef SYSIO_TRACING - -/* - * Tracing callback record. - */ -struct trace_callback { - TAILQ_ENTRY(trace_callback) links; /* trace list links */ - void (*f)(const char *file, /* callback function */ - const char *func, - int line, - void *data); - void *data; /* callback data */ - void (*destructor)(void *data); /* data destructor */ -}; - -/* - * Initialize a tracing callback record. - */ -#define TCB_INIT(__tcb, __f, __d, __destroy) \ - do { \ - (__tcb)->f = (__f); \ - (__tcb)->data = (__d); \ - (__tcb)->destructor = (__destroy); \ - } while (0); - -/* - * Trace queue head record. - */ -TAILQ_HEAD(trace_q, trace_callback); - -/* - * The entry and exit queue heads, and queue pointers. - */ -static struct trace_q _sysio_entry_trace_head; -void *_sysio_entry_trace_q = &_sysio_entry_trace_head; -static struct trace_q _sysio_exit_trace_head; -void *_sysio_exit_trace_q = &_sysio_exit_trace_head; -#endif - -/* - * White space characters. - */ -#define IGNORE_WHITE " \t\r\n" - -/* - * Check if long overflows integer range. - */ -#if LONG_MAX <= INT_MAX -#define _irecheck(_l, _e) \ - ((_l) == LONG_MAX && (_e) == ERANGE) -#else -#define _irecheck(_l, _e) \ - ((_l) > INT_MAX) -#endif - -/* - * In sysio_init we'll allow simple comments, strings outside {} - * delimited by COMMENT_INTRO, and '\n' or '\0' - */ -#define COMMENT_INTRO '#' - -/* - * In sysio_init we'll allow simple comments, strings outside {} - * delimited by COMMENT_INTRO, and '\n' or '\0' - */ -#define COMMENT_INTRO '#' - -/* - * Sysio library initialization. Must be called before anything else in the - * library. - */ -int -_sysio_init() -{ - int err; -#ifdef WITH_SOCKETS - extern int _sysio_sockets_init(void); -#endif - -#ifdef SYSIO_TRACING - /* - * Initialize tracing callback queues. - */ - TAILQ_INIT(&_sysio_entry_trace_head); - TAILQ_INIT(&_sysio_exit_trace_head); -#endif - - err = _sysio_ioctx_init(); - if (err) - goto error; - err = _sysio_i_init(); - if (err) - goto error; - err = _sysio_mount_init(); - if (err) - goto error; - - err = _sysio_dev_init(); - if (err) - goto error; -#ifdef STDFD_DEV - err = _sysio_stdfd_init(); - if (err) - goto error; -#endif -#ifdef WITH_SOCKETS - err = _sysio_sockets_init(); - if (err) - goto error; -#endif - - goto out; -error: - errno = -err; -out: - /* - * Unlike all other _sysio routines, this one returns with errno - * set. It also returns the error, as usual. - */ - return err; -} - -/* - * Sysio library shutdown. - */ -void -_sysio_shutdown() -{ - - if (!(_sysio_fd_close_all() == 0 && - _sysio_unmount_all() == 0)) - abort(); - -#ifdef ZERO_SUM_MEMORY - _sysio_fd_shutdown(); - _sysio_i_shutdown(); - _sysio_fssw_shutdown(); - _sysio_access_shutdown(); -#ifdef SYSIO_TRACING - { - struct trace_callback *tcb; - - /* - * Empty the trace queues and free the entries. - */ - while ((tcb = _sysio_entry_trace_head.tqh_first) != NULL) - _sysio_remove_trace(&_sysio_entry_trace_head, tcb); - while ((tcb = _sysio_exit_trace_head.tqh_first) != NULL) - _sysio_remove_trace(&_sysio_exit_trace_head, tcb); - } -#endif -#endif -} - -#ifdef SYSIO_TRACING - -#if !(defined(_HAVE_ASPRINTF) && _HAVE_ASPRINTF) -/* - * Print a string to allocated memory. - */ -static int -vasprintf(char **strp, const char *fmt, va_list ap) -{ - size_t siz; - int oerrno; - char *s; - va_list aq; - int n; - - siz = 50; - oerrno = errno; - if (!(s = malloc(siz))) { - errno = oerrno; - return -1; - } - for (;;) { - va_copy(aq, ap); - n = vsnprintf (s, siz, fmt, aq); - va_end(aq); - if (n > -1 && (size_t )n < siz) - break; - if (n > -1) /* glibc 2.1 */ - siz = n+1; /* precise */ - else /* glibc 2.0 */ - siz *= 2; /* twice the old */ - if (!(s = realloc (s, siz))) - break; - } - *strp = s; - errno = oerrno; - return n; -} - -#if 0 -static int -asprintf(char **strp, const char *fmt, ...) -{ - va_list ap; - int n; - - va_start(ap, fmt); - n = vasprintf(strp, fmt, ap); - va_end(ap); - return n; -} -#endif -#endif /* !(defined(_HAVE_ASPRINTF) && _HAVE_ASPRINTF) */ - -static void -_sysio_cwrite(const char *buf, size_t len) -{ - int oerrno; - - oerrno = errno; - (void )syscall(SYSIO_SYS_write, STDERR_FILENO, buf, len); - errno = oerrno; -} - -/* - * Console printf. - */ -void -_sysio_cprintf(const char *fmt, ...) -{ - va_list ap; - int len; - char *buf; - - va_start(ap, fmt); - buf = NULL; - len = vasprintf(&buf, fmt, ap); - va_end(ap); - if (len < 0) - return; - _sysio_cwrite(buf, len); - free(buf); -} - -/* - * Register a trace callback. - * - * The pointer to the trace record is returned. - */ -void * -_sysio_register_trace(void *q, - void (*f)(const char *file, - const char *func, - int line, - void *data), - void *data, - void (*destructor)(void *data)) -{ - struct trace_callback *tcb; - - tcb = malloc(sizeof(struct trace_callback)); - if (!tcb) - return NULL; - TCB_INIT(tcb, f, data, destructor); - TAILQ_INSERT_TAIL((struct trace_q *)q, tcb, links); - return tcb; -} - -/* - * Remove a registered trace callback. - */ -void -_sysio_remove_trace(void *q, void *p) -{ - struct trace_callback *tcb; - - tcb = (struct trace_callback *)p; - - if (tcb->destructor) - (*tcb->destructor)(tcb->data); - TAILQ_REMOVE((struct trace_q *)q, tcb, links); - free(tcb); -} - -void -/* - * Run a trace queue, making all the callbacks. - */ -_sysio_run_trace_q(void *q, - const char *file, - const char *func, - int line) -{ - struct trace_callback *tcb; - - tcb = ((struct trace_q *)q)->tqh_first; - while (tcb) { - (*tcb->f)(file, func, line, tcb->data); - tcb = tcb->links.tqe_next; - } -} - -static void -_sysio_trace_entry(const char *file __IS_UNUSED, - const char *func, - int line __IS_UNUSED, - void *data __IS_UNUSED) -{ - - _sysio_cprintf("+ENTER+ %s\n", func); -} - -static void -_sysio_trace_exit(const char *file __IS_UNUSED, - const char *func, - int line __IS_UNUSED, - void *data __IS_UNUSED) -{ - - _sysio_cprintf("+EXIT+ %s\n", func); -} -#endif /* defined(SYSIO_TRACING) */ - -/* - * (kind of)Duplicates strtok function. - * - * Given a buffer, returns the longest string - * that does not contain any delim characters. Will - * remove ws and any characters in the ignore string. - * Returns the token. - * - * The parameter controlling acceptance controls whether a positive - * match for some delimiter be made or not. If set, then either a delimiter - * or NUL character is success. - * - */ -const char * -_sysio_get_token(const char *buf, - int accepts, - const char *delim, - const char *ignore, - char *tbuf) -{ - char c; - int escape, quote; - - /* - * Find the first occurance of delim, recording how many - * characters lead up to it. Ignore indicated characters. - */ - escape = quote = 0; - while ((c = *buf) != '\0') { - buf++; - if (!escape) { - if (c == '\\') { - escape = 1; - continue; - } - if (c == '\"') { - quote ^= 1; - continue; - } - if (!quote) { - if (strchr(delim, c) != NULL) { - accepts = 1; - break; - } - if (strchr(ignore, c) != NULL) - continue; - } - } else - escape = 0; - *tbuf++ = c; - } - if (!accepts) - return NULL; - *tbuf = '\0'; /* NUL term */ - return buf; -} - -/* - * Parse and record named arguments given as `name = value', comma-separated - * pairs. - * - * NB: Alters the passed buffer. - */ -char * -_sysio_get_args(char *buf, struct option_value_info *vec) -{ - char *nxt; - char *name, *value; - struct option_value_info *v; - - for (;;) { - nxt = - (char *)_sysio_get_token(buf, - 1, - "=,", - IGNORE_WHITE, - name = buf); - if (!nxt || - (nxt != buf && *name == '\0' && buf + strlen(buf) == nxt)) { - buf = NULL; - break; - } - if (*name == '\0') - break; - buf = - (char *)_sysio_get_token(nxt, - 1, - ",", - IGNORE_WHITE, - value = nxt); - if (*value == '\0') - value = NULL; - for (v = vec; v->ovi_name; v++) - if (strcmp(v->ovi_name, name) == 0) - break; - if (!v->ovi_name) - return NULL; - v->ovi_value = value; - } - - return buf; -} - -static int -parse_mm(const char *s, dev_t *devp) -{ - unsigned long ul; - char *cp; - dev_t dev; - - ul = strtoul(s, &cp, 0); - if (*cp != '+' || ul > USHRT_MAX) - return -EINVAL; - dev = ul << 16; - s = (const char *)++cp; - ul = strtoul(s, &cp, 0); - if (*cp != '\0' || ul > USHRT_MAX) - return -EINVAL; - dev |= ul & 0xffff; - *devp = dev; - return 0; -} - -/* - * Performs the creat command for the namespace assembly - * - * NB: Alters the passed buffer. - */ -static int -do_creat(char *args) -{ - size_t len; - struct option_value_info v[] = { - { "ft", NULL }, /* file type */ - { "nm", NULL }, /* name */ - { "pm", NULL }, /* permissions */ - { "ow", NULL }, /* owner */ - { "gr", NULL }, /* group */ - { "mm", NULL }, /* major + minor */ - { "str", NULL }, /* file data */ - { NULL, NULL } - }; - const char *cp; - long perms; - long owner, group; - struct pnode *dir, *pno; - mode_t mode; - struct intent intent; - dev_t dev; - int err; - enum { - CREATE_DIR = 1, - CREATE_CHR = 2, - CREATE_BLK = 3, - CREATE_FILE = 4 - } op; - int intent_mode; - struct inode *ino; - int i; - - len = strlen(args); - if (_sysio_get_args(args, v) - args != (ssize_t )len || - !(v[0].ovi_value && - v[1].ovi_value && - v[2].ovi_value)) - return -EINVAL; - perms = strtol(v[2].ovi_value, (char **)&cp, 0); - if (*cp || - perms < 0 || - (perms == LONG_MAX && errno == ERANGE) || - ((unsigned)perms & ~07777)) - return -EINVAL; - if (v[3].ovi_value) { - owner = strtol(v[3].ovi_value, (char **)&cp, 0); - if (*cp || - ((owner == LONG_MIN || owner == LONG_MAX) - && errno == ERANGE)) - return -EINVAL; - } else - owner = getuid(); - if (v[4].ovi_value) { - group = strtol(v[4].ovi_value, (char **)&cp, 0); - if (*cp || - ((group == LONG_MIN || group == LONG_MAX) && - errno == ERANGE)) - return -EINVAL; - } else - group = getegid(); - - if (!(dir = _sysio_cwd) && !(dir = _sysio_root)) - return -ENOENT; - - /* - * Init, get the operation, setup the intent. - */ - err = 0; - mode = perms; - op = 0; - if (strcmp(v[0].ovi_value, "dir") == 0) { - op = CREATE_DIR; - INTENT_INIT(&intent, INT_CREAT, &mode, NULL); - } else if (strcmp(v[0].ovi_value, "chr") == 0) { - op = CREATE_CHR; - mode |= S_IFCHR; - INTENT_INIT(&intent, INT_CREAT, &mode, NULL); - if (!(v[5].ovi_value && parse_mm(v[5].ovi_value, &dev) == 0)) - err = -EINVAL; - } else if (strcmp(v[0].ovi_value, "blk") == 0) { - op = CREATE_BLK; - mode |= S_IFBLK; - INTENT_INIT(&intent, INT_CREAT, &mode, NULL); - if (!(v[5].ovi_value && parse_mm(v[5].ovi_value, &dev) == 0)) - err = -EINVAL; - } else if (strcmp(v[0].ovi_value, "file") == 0) { - op = CREATE_FILE; - intent_mode = O_CREAT|O_EXCL; - INTENT_INIT(&intent, INT_CREAT, &mode, &intent_mode); - } else - err = -EINVAL; - if (err) - return err; - - /* - * Lookup the given path. - */ - err = - _sysio_namei(dir, - v[1].ovi_value, - ND_NEGOK|ND_NOPERMCHECK, - &intent, - &pno); - if (err) - return err; - - /* - * Perform. - */ - switch (op) { - case CREATE_DIR: - err = _sysio_mkdir(pno, mode); - break; - case CREATE_CHR: - case CREATE_BLK: - err = _sysio_mknod(pno, mode, dev); - break; - case CREATE_FILE: - err = _sysio_open(pno, O_CREAT|O_EXCL, mode); - if (err) - break; - ino = pno->p_base->pb_ino; - if (v[6].ovi_value) { - struct iovec iovec; - struct intnl_xtvec xtvec; - struct ioctx io_context; - - /* - * Deposit optional file content. - */ - iovec.iov_base = v[6].ovi_value; - iovec.iov_len = strlen(v[6].ovi_value); - xtvec.xtv_off = 0; - xtvec.xtv_len = iovec.iov_len; - IOCTX_INIT(&io_context, - 1, - 1, - ino, - &iovec, 1, - &xtvec, 1); - _sysio_ioctx_enter(&io_context); - err = - (*ino->i_ops.inop_write)(pno->p_base->pb_ino, - &io_context); - if (!err) { - ssize_t cc; - - cc = _sysio_ioctx_wait(&io_context); - if (cc < 0) - err = cc; - else if ((size_t )cc != iovec.iov_len) - err = -EIO; /* huh? */ - } else - _sysio_ioctx_complete(&io_context); - } - i = (*ino->i_ops.inop_close)(ino); - if (!err) - err = i; - break; - default: - abort(); - } - - P_RELE(pno); - return err; -} - -/* - * Do mount. - * - * NB: The passed buffer is altered. - */ -static int -do_mnt(char *args) -{ - size_t len; - struct option_value_info v[] = { - { "dev", NULL }, /* source (type:dev) */ - { "dir", NULL }, /* target dir */ - { "fl", NULL }, /* flags */ - { "da", NULL }, /* mount data */ - { NULL, NULL } - }; - char *ty, *name; - unsigned long flags; - struct pnode *dir; - - len = strlen(args); - if (_sysio_get_args(args, v) - args != (ssize_t )len || - !(v[0].ovi_value && v[1].ovi_value)) - return -EINVAL; - ty = - (char *)_sysio_get_token(v[0].ovi_value, - 1, - ":", - "", - name = v[0].ovi_value); - flags = 0; - if (v[2].ovi_value) { - char *cp; - - /* - * Optional flags. - */ - flags = strtoul(v[2].ovi_value, &cp, 0); - if (*cp || (flags == ULONG_MAX && errno == ERANGE)) - return -EINVAL; - } - - if (strlen(v[1].ovi_value) == 1 && v[1].ovi_value[0] == PATH_SEPARATOR) { - /* - * Aha! It's root they want. Have to do that special. - */ - return _sysio_mount_root(ty, name, flags, v[3].ovi_value); - } - - if (!(dir = _sysio_cwd) && !(dir = _sysio_root)) - return -ENOENT; - return _sysio_mount(dir, - ty, - v[1].ovi_value, - name, - flags, - v[3].ovi_value); -} - - -#if 0 -/* - * Chdir - * - * NB: Alters the passed buffer. - */ -static int -do_cd(char *args) -{ - size_t len; - struct option_value_info v[] = { - { "dir", NULL }, /* directory */ - { NULL, NULL } - }; - int err; - struct pnode *dir, *pno; - - len = strlen(args); - if (_sysio_get_args(args, v) - args != (ssize_t )len || !v[0].ovi_value) - return -EINVAL; - - if (!(dir = _sysio_cwd) && !(dir = _sysio_root)) { - /* - * We have no namespace yet. They really need to give us - * something to work with. - */ - return -ENOENT; - } - err = _sysio_namei(dir, v[0].ovi_value, 0, NULL, &pno); - if (err) - return err; - err = _sysio_p_chdir(pno); - if (err) - P_RELE(pno); - return err; -} -#endif - -/* - * Does a chmod - * - * NB: Alters passed buffer. - */ -static int -do_chmd(char *args) -{ - size_t len; - struct option_value_info v[] = { - { "src", NULL }, /* path */ - { "pm", NULL }, /* perms */ - { NULL, NULL } - }; - long perms; - char *cp; - struct intnl_stat stbuf; - int err; - struct pnode *dir, *pno; - - len = strlen(args); - if (_sysio_get_args(args, v) - args != (ssize_t )len || - !(v[0].ovi_value && v[1].ovi_value)) - return -EINVAL; - perms = strtol(v[1].ovi_value, &cp, 0); - if (*cp || - perms < 0 || - (perms == LONG_MAX && errno == ERANGE) || - ((unsigned)perms & ~07777)) - return -EINVAL; - (void )memset(&stbuf, 0, sizeof(stbuf)); - stbuf.st_mode = (mode_t)perms; - - if (!(dir = _sysio_cwd) && !(dir = _sysio_root)) - return -ENOENT; - err = _sysio_namei(dir, v[0].ovi_value, ND_NOPERMCHECK, NULL, &pno); - if (err) - return err; - err = _sysio_setattr(pno, pno->p_base->pb_ino, SETATTR_MODE, &stbuf); - P_RELE(pno); - - return err; -} - -static int -do_open(char *args) -{ - size_t len; - struct option_value_info v[] = { - { "nm", NULL }, /* path */ - { "fd", NULL }, /* fildes */ - { "m", NULL }, /* mode */ - { NULL, NULL } - }; - char *cp; - long l; - int fd; - unsigned long ul; - mode_t m; - struct pnode *dir, *pno; - struct intent intent; - int err; - struct file *fil; - - len = strlen(args); - if (_sysio_get_args(args, v) - args != (ssize_t )len || - !(v[0].ovi_value && v[1].ovi_value && v[2].ovi_value)) - return -EINVAL; - l = strtol(v[1].ovi_value, (char **)&cp, 0); - if (*cp || l < 0 || _irecheck(l, errno)) - return -EINVAL; - fd = (int )l; - ul = strtoul(v[1].ovi_value, (char **)&cp, 0); - if (*cp || - (ul == ULONG_MAX && errno == ERANGE)) - return -EINVAL; - m = (mode_t )ul & (O_RDONLY|O_WRONLY|O_RDWR); - - if (!(dir = _sysio_cwd) && !(dir = _sysio_root)) - return -ENOENT; - INTENT_INIT(&intent, INT_OPEN, &m, NULL); - pno = NULL; - err = _sysio_namei(dir, v[0].ovi_value, ND_NOPERMCHECK, &intent, &pno); - if (err) - return err; - fil = NULL; - do { - err = _sysio_open(pno, m, 0); - if (err) - break; - fil = _sysio_fnew(pno->p_base->pb_ino, m); - if (!fil) { - err = -ENOMEM; - break; - } - err = _sysio_fd_set(fil, fd, 1); - if (err < 0) - break; - P_RELE(pno); - return 0; - } while (0); - if (fil) - F_RELE(fil); - if (pno) - P_RELE(pno); - return err; -} - -/* - * Execute the given cmd. - * - * NB: Buf is altered. - */ -static int -do_command(char *buf) -{ - size_t len; - char *args, *cmd; - - len = strlen(buf); - args = (char *)_sysio_get_token(buf, 1, ",", IGNORE_WHITE, cmd = buf); - if (args) { - if (strcmp("creat", cmd) == 0) - return do_creat(args); - if (strcmp("mnt", cmd) == 0) - return do_mnt(args); -#if 0 - if (strcmp("cd", cmd) == 0) - return do_cd(args); -#endif - if (strcmp("chmd", cmd) == 0) - return do_chmd(args); - if (strcmp("open", cmd) == 0) - return do_open(args); - } - return -EINVAL; -} - -#ifdef SYSIO_TRACING -/* - * Set/Unset tracing. - */ -static int -_sysio_boot_tracing(const char *arg) -{ - long l; - char *cp; - static struct trace_callback - *entcb = NULL, - *exitcb = NULL; - - l = 0; - if (arg) { - l = strtol(arg, (char **)&cp, 0); - if (*cp || !(l == 0 || l == 1)) - return -EINVAL; - } - if (l) { - if (entcb == NULL) - entcb = - _sysio_register_trace(_sysio_entry_trace_q, - _sysio_trace_entry, - NULL, - NULL); - if (entcb == NULL) - return -errno; - if (exitcb == NULL) - exitcb = - _sysio_register_trace(_sysio_exit_trace_q, - _sysio_trace_exit, - NULL, - NULL); - if (exitcb == NULL) - return -errno; - } else { - if (entcb != NULL) - _sysio_remove_trace(_sysio_entry_trace_q, entcb); - entcb = NULL; - if (exitcb != NULL) - _sysio_remove_trace(_sysio_exit_trace_q, exitcb); - exitcb = NULL; - } - return 0; -} -#endif - -/* - * Initialize the namespace. - */ -static int -_sysio_boot_namespace(const char *arg) -{ - char c, *tok; - ssize_t len; - int err; - unsigned count; - /* - * Allocate token buffer. - */ - len = strlen(arg); - tok = malloc(len ? len : 1); - if (!tok) - return -ENOMEM; - err = 0; - count = 0; - while (1) { - /* - * Discard leading white space. - */ - while ((c = *arg) != '\0' && strchr(IGNORE_WHITE, c)) - arg++; - if (COMMENT_INTRO == c) { - /* - * Discard comment. - */ - while (*arg && (*arg != '\n')) { - ++arg; - } - continue; - } - - if (c == '\0') - break; - if (c != '{') { - err = -EINVAL; - break; - } - /* - * Get the command. - */ - *tok = '\0'; - arg = - (char *)_sysio_get_token(arg + 1, - 0, - "}", - IGNORE_WHITE, - tok); - if (!arg) { - err = -EINVAL; - break; - } - count++; - /* - * Perform. - */ - err = do_command(tok); - if (err) - break; - } -#ifdef SYSIO_TRACING - if (err) - _sysio_cprintf("+NS init+ failed at expr %u (last = %s): %s\n", - count, - tok && *tok ? tok : "NULL", - strerror(-err)); -#endif - free(tok); - return err; -} - -#ifdef DEFER_INIT_CWD -/* - * Set deferred initial working directory. - */ -static int -_sysio_boot_cwd(const char *arg) -{ - - _sysio_init_cwd = arg; - return 0; -} -#endif - -/* - * Given an identifier and it's arguments, perform optional initializations. - */ -int -_sysio_boot(const char *opt, const char *arg) -{ - struct option_value_info vec[] = { -#ifdef SYSIO_TRACING - { "trace", NULL }, /* tracing? */ -#endif - { "namespace", NULL }, /* init namespace? */ -#ifdef DEFER_INIT_CWD - { "cwd", NULL }, /* init working dir */ -#endif - { NULL, NULL } - }; - struct option_value_info *v; - unsigned u; - static int (*f[])(const char *) = { -#ifdef SYSIO_TRACING - _sysio_boot_tracing, -#endif - _sysio_boot_namespace, -#ifdef DEFER_INIT_CWD - _sysio_boot_cwd, -#endif - NULL /* can't happen */ - }; - - for (v = vec, u = 0; v->ovi_name; v++, u++) - if (strcmp(v->ovi_name, opt) == 0) - break; - if (!v->ovi_name) - return -EINVAL; - return (*f[u])(arg); -} diff --git a/libsysio/src/inode.c b/libsysio/src/inode.c deleted file mode 100644 index fbe027dbd3b04d1194b1705dc46ade1a0ae731b3..0000000000000000000000000000000000000000 --- a/libsysio/src/inode.c +++ /dev/null @@ -1,981 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "fs.h" -#include "mount.h" -#include "inode.h" -#include "dev.h" - -/* - * Support for path and index nodes. - */ - -/* - * Size of all names bucket-hash table. - */ -#ifndef NAMES_TABLE_LEN -#define NAMES_TABLE_LEN 251 -#endif - -/* - * Desired i-nodes cache size is MAX_INODES_MULTIPLIER times the number - * of slots in the names hash table. - */ -#define MAX_INODES_MULTIPLIER 3 - -/* - * Active i-nodes in the system and the number of same. - */ -struct inodes_head _sysio_inodes; -static size_t n_inodes = 0; -/* - * Desired number of active i-nodes. - */ -static size_t max_inodes = (MAX_INODES_MULTIPLIER * NAMES_TABLE_LEN); - -/* - * System table for rapid access to component names. - */ -static LIST_HEAD(, pnode_base) names[NAMES_TABLE_LEN]; -/* - * Number of names tracked by the system. - */ -static size_t n_names = 0; -/* - * Desired number of base path nodes to maintain. - */ -static size_t max_names = (2 * NAMES_TABLE_LEN); - -/* - * Number of pnodes to grab per memory allocation when filling the - * free list. - */ -#define PNODES_PER_CHUNK ((8 * 1024) / sizeof(struct pnode) - 2) - -#ifdef ZERO_SUM_MEMORY -/* - * Allocation information for pnodes bulk allocation. - */ -struct pnodes_block { - LIST_ENTRY(pnodes_block) pnblk_links; - struct pnode pnblk_nodes[PNODES_PER_CHUNK]; -}; - -static LIST_HEAD( ,pnodes_block) pnblocks; -#endif - -/* - * List of all path-nodes (aliases) referenced by any tree. - */ -struct pnodes_head _sysio_pnodes; - -/* - * Free path-nodes -- Not referenced by any tree for fas reuse. - */ -static LIST_HEAD( ,pnode) free_pnodes; - -/* - * The system root -- Aka `/'. - */ -struct pnode *_sysio_root = NULL; - -/* - * Initialize path and i-node support. Must be called before any other - * routine in this module. - */ -int -_sysio_i_init() -{ - unsigned i; - - TAILQ_INIT(&_sysio_inodes); - - for (i = 0; i < NAMES_TABLE_LEN; i++) - LIST_INIT(&names[i]); - -#ifdef ZERO_SUM_MEMORY - LIST_INIT(&pnblocks); -#endif - TAILQ_INIT(&_sysio_pnodes); - LIST_INIT(&free_pnodes); - - return 0; -} - -/* - * Garbage-collect idle i-nodes. We try to keep resource use limited to - * MAX_INODES_MULTIPLIER * max_names. - */ -static void -i_reclaim() -{ - struct inode *next, *ino; - size_t t; - - /* - * I just can't figure out a good way to reclaim these well without - * getting really fancy and using complex algorithms. The - * base nodes hold references on them for a long time and then - * release them. Those will age to the front of the queue and - * we have to skip over them. Oh well... - */ - t = MAX_INODES_MULTIPLIER * max_names; - if (max_inodes < t) { - /* - * Oops. Nope. We want more inodes than names entries. - */ - max_inodes = t; - return; - } - next = _sysio_inodes.tqh_first; - if (!next) - return; - t = max_inodes / 2; - do { - ino = next; - next = ino->i_nodes.tqe_next; - if (ino->i_ref || ino->i_immune) - continue; - _sysio_i_gone(ino); - } while (next && n_inodes > t); - - if (n_inodes > t) - max_inodes += t; -} - -static unsigned -hash(struct file_identifier *fid) -{ - size_t n; - unsigned char *ucp; - unsigned hkey; - - n = fid->fid_len; - ucp = fid->fid_data; - hkey = 0; - do { - hkey <<= 1; - hkey += *ucp++; - } while (--n); - return hkey; -} - -/* - * Allocate and initialize a new i-node. Returned i-node is referenced. - * - * NB: The passed file identifier is not copied. It is, therefor, up to the - * caller to assure that the value is static until the inode is destroyed. - */ -struct inode * -_sysio_i_new(struct filesys *fs, - struct file_identifier *fid, - struct intnl_stat *stat, - unsigned immunity, - struct inode_ops *ops, - void *private) -{ - struct inode *ino; - struct itable_entry *head; - struct inode_ops operations; - - if (n_inodes > max_inodes) { - /* - * Try to limit growth. - */ - i_reclaim(); - } - - ino = malloc(sizeof(struct inode)); - if (!ino) - return NULL; - ino->i_ops = *ops; - operations = *ops; - if (S_ISBLK(stat->st_mode) || - S_ISCHR(stat->st_mode) || - S_ISFIFO(stat->st_mode)) { - struct inode_ops *o; - - /* - * Replace some operations sent with - * those from the device table. - */ - o = _sysio_dev_lookup(stat->st_mode, stat->st_rdev); - operations.inop_open = o->inop_open; - operations.inop_close = o->inop_close; - operations.inop_read = o->inop_read; - operations.inop_write = o->inop_write; - operations.inop_pos = o->inop_pos; - operations.inop_iodone = o->inop_iodone; - operations.inop_fcntl = o->inop_fcntl; - operations.inop_datasync = o->inop_datasync; - operations.inop_ioctl = o->inop_ioctl; - } - I_INIT(ino, fs, stat, &operations, fid, immunity, private); - ino->i_ref = 1; - TAILQ_INSERT_TAIL(&_sysio_inodes, ino, i_nodes); - head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ]; - LIST_INSERT_HEAD(head, ino, i_link); - - n_inodes++; - assert(n_inodes); - - return ino; -} - -/* - * Find existing i-node given i-number and pointers to FS record - * and identifier. - */ -struct inode * -_sysio_i_find(struct filesys *fs, struct file_identifier *fid) -{ - struct inode *ino; - struct itable_entry *head; - - head = &fs->fs_itbl[hash(fid) % FS_ITBLSIZ]; - /* - * Look for existing. - */ - for (ino = head->lh_first; ino; ino = ino->i_link.le_next) - if (ino->i_fid->fid_len == fid->fid_len && - memcmp(ino->i_fid->fid_data, - fid->fid_data, - fid->fid_len) == 0) { - I_REF(ino); - break; - } - - return ino; -} - -/* - * Force reclaim of idle i-node. - */ -void -_sysio_i_gone(struct inode *ino) -{ - - if (ino->i_ref) - abort(); - if (!ino->i_zombie) - LIST_REMOVE(ino, i_link); - TAILQ_REMOVE(&_sysio_inodes, ino, i_nodes); - (*ino->i_ops.inop_gone)(ino); - free(ino); - - assert(n_inodes); - n_inodes--; -} - -/* - * Stale inode, zombie it and move it out of the way - */ -void -_sysio_i_undead(struct inode *ino) -{ - - if (ino->i_zombie) - return; - LIST_REMOVE(ino, i_link); - ino->i_zombie = 1; -} - -/* - * Garbage collect idle path (and base path) nodes tracked by the system. - */ -static void -p_reclaim() -{ - struct pnode *next, *pno; - size_t t; - - next = _sysio_pnodes.tqh_first; - if (!next) - return; - t = max_names / 2; - do { - pno = next; - if (pno->p_ref) { - next = pno->p_nodes.tqe_next; - continue; - } - pno->p_ref++; - assert(pno->p_ref); - (void )_sysio_p_prune(pno); - next = pno->p_nodes.tqe_next; - assert(pno->p_ref); - pno->p_ref--; - if (pno->p_ref) - continue; - (void )_sysio_p_prune(pno); - } while (n_names > t && next); - - if (n_names > t) - max_names += t; -} - -/* - * Allocate and initialize a new base path node. - */ -struct pnode_base * -_sysio_pb_new(struct qstr *name, struct pnode_base *parent, struct inode *ino) -{ - struct pnode_base *pb; - - if (n_names > max_names) { - /* - * Try to limit growth. - */ - p_reclaim(); - } - - pb = malloc(sizeof(struct pnode_base) + name->len); - if (!pb) - return NULL; - - pb->pb_name.name = NULL; - pb->pb_name.len = name->len; - if (pb->pb_name.len) { - char *cp; - - /* - * Copy the passed name. - * - * We have put the space for the name immediately behind - * the record in order to maximize spatial locality. - */ - cp = (char *)pb + sizeof(struct pnode_base); - (void )strncpy(cp, name->name, name->len); - pb->pb_name.name = cp; - assert(name->hashval); - pb->pb_name.hashval = name->hashval; - LIST_INSERT_HEAD(&names[name->hashval % NAMES_TABLE_LEN], - pb, - pb_names); - } - pb->pb_ino = ino; - LIST_INIT(&pb->pb_children); - LIST_INIT(&pb->pb_aliases); - if (parent) - LIST_INSERT_HEAD(&parent->pb_children, pb, pb_sibs); - pb->pb_parent = parent; - - n_names++; - assert(n_names); - - return pb; -} - -/* - * Destroy base path node, releasing resources back to the system. - * - * NB: Caller must release the inode referenced by the record. - */ -static void -pb_destroy(struct pnode_base *pb) -{ - - assert(n_names); - n_names--; - - assert(!pb->pb_aliases.lh_first); - assert(!pb->pb_children.lh_first); - assert(!pb->pb_ino); - if (pb->pb_name.len) - LIST_REMOVE(pb, pb_names); - if (pb->pb_parent) - LIST_REMOVE(pb, pb_sibs); - -#ifndef NDEBUG - /* - * This can help us catch pb-nodes that are free'd redundantly. - */ - pb->pb_name.hashval = 0; -#endif - free(pb); -} - -/* - * Force reclaim of idle base path node. - */ -void -_sysio_pb_gone(struct pnode_base *pb) -{ - - if (pb->pb_ino) - I_RELE(pb->pb_ino); - pb->pb_ino = NULL; - - pb_destroy(pb); -} - -/* - * Generate more path (alias) nodes for the fast allocator. - */ -static void -more_pnodes() -{ - size_t n; -#ifdef ZERO_SUM_MEMORY - struct pnodes_block *pnblk; -#endif - struct pnode *pno; - -#ifdef ZERO_SUM_MEMORY - pnblk = malloc(sizeof(struct pnodes_block)); - pno = NULL; - if (pnblk) { - LIST_INSERT_HEAD(&pnblocks, pnblk, pnblk_links); - pno = pnblk->pnblk_nodes; - } -#else - pno = malloc(PNODES_PER_CHUNK * sizeof(struct pnode)); -#endif - if (!pno) - return; - n = PNODES_PER_CHUNK; - do { - LIST_INSERT_HEAD(&free_pnodes, pno, p_links); - pno++; - } while (--n); -} - -#ifdef ZERO_SUM_MEMORY -/* - * Shutdown - */ -void -_sysio_i_shutdown() -{ - struct pnodes_block *pnblk; - - while ((pnblk = pnblocks.lh_first)) { - LIST_REMOVE(pnblk, pnblk_links); - free(pnblk); - } -} -#endif - -/* - * Allocate, initialize and establish appropriate links for new path (alias) - * node. - */ -struct pnode * -_sysio_p_new_alias(struct pnode *parent, - struct pnode_base *pb, - struct mount *mnt) -{ - struct pnode *pno; - - assert(!pb->pb_name.name || pb->pb_name.hashval); - - pno = free_pnodes.lh_first; - if (!pno) { - more_pnodes(); - pno = free_pnodes.lh_first; - } - if (!pno) - return NULL; - LIST_REMOVE(pno, p_links); - - pno->p_ref = 1; - pno->p_parent = parent; - if (!pno->p_parent) - pno->p_parent = pno; - pno->p_base = pb; - pno->p_mount = mnt; - pno->p_cover = NULL; - LIST_INSERT_HEAD(&pb->pb_aliases, pno, p_links); - TAILQ_INSERT_TAIL(&_sysio_pnodes, pno, p_nodes); - - return pno; -} - -/* - * For reclamation of idle path (alias) node. - */ -void -_sysio_p_gone(struct pnode *pno) -{ - struct pnode_base *pb; - - assert(!pno->p_ref); - assert(!pno->p_cover); - - TAILQ_REMOVE(&_sysio_pnodes, pno, p_nodes); - LIST_REMOVE(pno, p_links); - - pb = pno->p_base; - if (!(pb->pb_aliases.lh_first || pb->pb_children.lh_first)) - _sysio_pb_gone(pb); - - LIST_INSERT_HEAD(&free_pnodes, pno, p_links); -} - -/* - * (Re)Validate passed path node. - */ -int -_sysio_p_validate(struct pnode *pno, struct intent *intnt, const char *path) -{ - struct inode *ino; - struct pnode_base *rootpb; - int err; - - ino = pno->p_base->pb_ino; - /* - * An invalid pnode will not have an associated inode. We'll use - * the FS root inode, then -- It *must* be valid. - */ - rootpb = pno->p_mount->mnt_root->p_base; - assert(rootpb->pb_ino); - err = - rootpb->pb_ino->i_ops.inop_lookup(pno, - &ino, - intnt, - path); - /* - * If the inode lookup returns a different inode, release the old if - * present and point to the new. - */ - if (err || pno->p_base->pb_ino != ino) { - if (pno->p_base->pb_ino) - I_RELE(pno->p_base->pb_ino); - pno->p_base->pb_ino = ino; - } - return err; -} - -/* - * Find (or create!) an alias for the given parent and name. A misnomer, - * really -- This is a "get". Returned path node is referenced. - */ -int -_sysio_p_find_alias(struct pnode *parent, - struct qstr *name, - struct pnode **pnop) -{ - struct pnode_base *pb; - int err; - struct pnode *pno; - - /* - * Find the named child. - */ - if (name->len) { - /* - * Try the names table. - */ - pb = names[name->hashval % NAMES_TABLE_LEN].lh_first; - while (pb) { - if (pb->pb_parent == parent->p_base && - pb->pb_name.len == name->len && - strncmp(pb->pb_name.name, - name->name, - name->len) == 0) - break; - pb = pb->pb_names.le_next; - } - } else { - /* - * Brute force through the parent's list of children. - */ - pb = parent->p_base->pb_children.lh_first; - while (pb) { - if (pb->pb_parent == parent->p_base && - pb->pb_name.len == name->len && - strncmp(pb->pb_name.name, - name->name, - name->len) == 0) - break; - pb = pb->pb_sibs.le_next; - } - } - if (!pb) { - /* - * None found, create new child. - */ - pb = _sysio_pb_new(name, parent->p_base, NULL); - if (!pb) - return -ENOMEM; - } - /* - * Now find the proper alias. It's the one with the passed - * parent. - */ - err = 0; - pno = pb->pb_aliases.lh_first; - while (pno) { - if (pno->p_parent == parent) { - P_REF(pno); - break; - } - pno = pno->p_links.le_next; - } - if (!pno) { - /* - * Hmm. No alias. Just create an invalid one, to be - * validated later. - */ - pno = _sysio_p_new_alias(parent, pb, parent->p_mount); - if (!pno) - err = -ENOMEM; - } - if (!err) - *pnop = pno; - return err; -} - -/* - * Prune idle path base nodes freom the passed sub-tree, including the root. - */ -static void -_sysio_prune(struct pnode_base *rpb) -{ - struct pnode_base *nxtpb, *pb; - - nxtpb = rpb->pb_children.lh_first; - while ((pb = nxtpb)) { - nxtpb = pb->pb_sibs.le_next; - if (pb->pb_aliases.lh_first) - continue; - if (pb->pb_children.lh_first) { - _sysio_prune(pb); - continue; - } - _sysio_pb_gone(pb); - } - if (rpb->pb_children.lh_first) - return; - _sysio_pb_gone(rpb); -} - -/* - * Prune idle nodes from the passed sub-tree, including the root. - * - * Returns the number of aliases on the same mount that could not be pruned. - * i.e. a zero return means the entire sub-tree is gone. - */ -size_t -_sysio_p_prune(struct pnode *root) -{ - size_t count; - struct pnode_base *nxtpb, *pb; - struct pnode *nxtpno, *pno; - - count = 0; - nxtpb = root->p_base->pb_children.lh_first; - while ((pb = nxtpb)) { - nxtpb = pb->pb_sibs.le_next; - nxtpno = pb->pb_aliases.lh_first; - if (!nxtpno) { - _sysio_prune(pb); - continue; - } - while ((pno = nxtpno)) { - nxtpno = pno->p_links.le_next; - if (pno->p_mount != root->p_mount) { - /* - * Not the alias we were looking for. - */ - continue; - } - if (pno->p_base->pb_children.lh_first) { - /* - * Node is interior. Recurse. - */ - count += _sysio_p_prune(pno); - continue; - } - if (pno->p_ref) { - /* - * Can't prune; It's active. - */ - count++; - continue; - } - assert(!pno->p_cover); /* covered => ref'd! */ - assert(!pno->p_base->pb_name.name || - pno->p_base->pb_name.hashval); - /* - * Ok to prune. - */ - if (pno->p_mount->mnt_root == pno) { -#ifndef AUTOMOUNT_FILE_NAME - count++; - continue; -#else - /* - * This is an automount-point. Must - * unmount before relcaim. - */ - P_REF(pno); - if (_sysio_do_unmount(pno->p_mount) != 0) { - P_RELE(pno); - count++; - } - continue; -#endif - } - _sysio_p_gone(pno); - } - } - - if (count) { - /* - * Can't get the root or we disconnect the sub-trees. - */ - return count + (root->p_ref ? 1 : 0); - } - - /* - * All that is left is the root. Try for it too. - */ - if (root->p_ref) { - count++; - } else if (root->p_mount->mnt_root == root) { -#ifndef AUTOMOUNT_FILE_NAME - count++; -#else - /* - * This is an automount-point. Must - * unmount before relcaim. - */ - P_REF(root); - if (_sysio_do_unmount(root->p_mount) != 0) { - P_RELE(root); - count++; - } -#endif - } else - _sysio_p_gone(root); - - return count; -} - -/* - * Return path tracked by the base path node ancestor chain. - * - * Remember, base path nodes track the path relative to the file system and - * path (alias) nodes track path relative to our name space -- They cross - * mount points. - */ -char * -_sysio_pb_path(struct pnode_base *pb, const char separator) -{ - char *buf; - size_t len, n; - struct pnode_base *tmp; - char *cp; - - /* - * First pass: Traverse to the root of the sub-tree, remembering - * lengths. - */ - len = 0; - tmp = pb; - do { - n = tmp->pb_name.len; - len += tmp->pb_name.len; - if (n) - len++; - tmp = tmp->pb_parent; - } while (tmp); - if (!len) - len++; - /* - * Alloc space. - */ - buf = malloc(len + 1); - if (!buf) - return NULL; - /* - * Fill in the path buffer -- Backwards, since we're starting - * from the end. - */ - cp = buf; - *cp = separator; - cp += len; - *cp = '\0'; /* NUL term */ - tmp = pb; - do { - cp -= tmp->pb_name.len; - n = tmp->pb_name.len; - if (n) { - (void )strncpy(cp, tmp->pb_name.name, n); - *--cp = separator; - } - tmp = tmp->pb_parent; - } while (tmp); - - return buf; -} - -/* - * Common set attributes routine. - */ -int -_sysio_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf) -{ - /* - * It is possible that pno is null (for ftruncate call). - */ - - if (pno) - assert(!ino || pno->p_base->pb_ino == ino); - if (!ino) - ino = pno->p_base->pb_ino; - assert(ino); - - if (pno && IS_RDONLY(pno)) - return -EROFS; - - /* - * Determining permission to change the attributes is - * difficult, at best. Just try it. - */ - return (*ino->i_ops.inop_setattr)(pno, ino, mask, stbuf); -} - -/* - * Do nothing. - */ -void -_sysio_do_noop() -{ - - return; -} - -/* - * Abort. - */ -void -_sysio_do_illop() -{ - - abort(); -} - -/* - * Return -EBADF - */ -int -_sysio_do_ebadf() -{ - - return -EBADF; -} - -/* - * Return -EINVAL - */ -int -_sysio_do_einval() -{ - - return -EINVAL; -} - -/* - * Return -ENOENT - */ -int -_sysio_do_enoent() -{ - - return -ENOENT; -} - -/* - * Return -ESPIPE - */ -int -_sysio_do_espipe() -{ - - return -ESPIPE; -} - -/* - * Return -EISDIR - */ -int -_sysio_do_eisdir() -{ - - return -EISDIR; -} - -/* - * Return -ENOSYS - */ -int -_sysio_do_enosys() -{ - - return -ENOSYS; -} - - -/* - * Return -ENODEV - */ -int -_sysio_do_enodev() -{ - - return -ENODEV; -} diff --git a/libsysio/src/ioctl.c b/libsysio/src/ioctl.c deleted file mode 100644 index b6934cb9e3febc6b754e4cc56ba18293cca8e99f..0000000000000000000000000000000000000000 --- a/libsysio/src/ioctl.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <errno.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(ioctl)(int fd, unsigned long request, ...) -{ - int err; - struct file *fil; - va_list ap; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - fil = _sysio_fd_find(fd); - if (!fil) { - err = -EBADF; - goto out; - } - - va_start(ap, request); - err = fil->f_ino->i_ops.inop_ioctl(fil->f_ino, request, ap); - va_end(ap); - -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - - -#ifdef __GLIBC__ -#undef __ioctl -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(ioctl), - PREPEND(__, SYSIO_INTERFACE_NAME(ioctl))) -#endif - -#ifdef BSD -#undef _ioctl -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(ioctl), - PREPEND(_, SYSIO_INTERFACE_NAME(ioctl))) -#endif diff --git a/libsysio/src/ioctx.c b/libsysio/src/ioctx.c deleted file mode 100644 index 6c066545f57005661e4829b2c52302a779e73f97..0000000000000000000000000000000000000000 --- a/libsysio/src/ioctx.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <sched.h> -#include <assert.h> -#include <sys/uio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "inode.h" - -#if defined(REDSTORM) -#include <catamount/do_iostats.h> -#endif - - -/* - * Asynchronous IO context support. - */ - -/* - * List of all outstanding (in-flight) asynch IO requests tracked - * by the system. - */ -static LIST_HEAD( ,ioctx) aioq; - -/* - * Free callback entry. - */ -#define cb_free(cb) free(cb) - -/* - * Initialization. Must be called before using any other routine in this - * module. - */ -int -_sysio_ioctx_init() -{ - - LIST_INIT(&aioq); - return 0; -} - -/* - * Enter an IO context onto the async IO events queue. - */ -void -_sysio_ioctx_enter(struct ioctx *ioctx) -{ - - LIST_INSERT_HEAD(&aioq, ioctx, ioctx_link); -} - -/* - * Allocate and initialize a new IO context. - */ -struct ioctx * -_sysio_ioctx_new(struct inode *ino, - int wr, - const struct iovec *iov, - size_t iovlen, - const struct intnl_xtvec *xtv, - size_t xtvlen) -{ - struct ioctx *ioctx; - - ioctx = malloc(sizeof(struct ioctx)); - if (!ioctx) - return NULL; - - I_REF(ino); - - IOCTX_INIT(ioctx, - 0, - wr, - ino, - iov, iovlen, - xtv, xtvlen); - - /* - * Link request onto the outstanding requests queue. - */ - _sysio_ioctx_enter(ioctx); - - return ioctx; -} - -/* - * Add an IO completion call-back to the end of the context call-back queue. - * These are called in iowait() as the last thing, right before the context - * is destroyed. - * - * They are called in order. Beware. - */ -int -_sysio_ioctx_cb(struct ioctx *ioctx, - void (*f)(struct ioctx *, void *), - void *data) -{ - struct ioctx_callback *entry; - - entry = malloc(sizeof(struct ioctx_callback)); - if (!entry) - return -ENOMEM; - - entry->iocb_f = f; - entry->iocb_data = data; - - TAILQ_INSERT_TAIL(&ioctx->ioctx_cbq, entry, iocb_next); - - return 0; -} - -/* - * Find an IO context given it's identifier. - * - * NB: This is dog-slow. If there are alot of these, we will need to change - * this implementation. - */ -struct ioctx * -_sysio_ioctx_find(void *id) -{ - struct ioctx *ioctx; - - for (ioctx = aioq.lh_first; ioctx; ioctx = ioctx->ioctx_link.le_next) - if (ioctx == id) - return ioctx; - - return NULL; -} - -/* - * Check if asynchronous IO operation is complete. - */ -int -_sysio_ioctx_done(struct ioctx *ioctx) -{ - - if (ioctx->ioctx_done) - return 1; - if (!(*ioctx->ioctx_ino->i_ops.inop_iodone)(ioctx)) - return 0; - ioctx->ioctx_done = 1; - return 1; -} - -/* - * Wait for asynchronous IO operation to complete, return status - * and dispose of the context. - * - * Note: - * The context is no longer valid after return. - */ -ssize_t -_sysio_ioctx_wait(struct ioctx *ioctx) -{ - ssize_t cc; - - /* - * Wait for async operation to complete. - */ - while (!_sysio_ioctx_done(ioctx)) { -#ifdef POSIX_PRIORITY_SCHEDULING - (void )sched_yield(); -#endif - } - - /* - * Get status. - */ - cc = ioctx->ioctx_cc; - if (cc < 0) - cc = -ioctx->ioctx_errno; - - /* - * Dispose. - */ - _sysio_ioctx_complete(ioctx); - - return cc; -} - -/* - * Free callback entry. - */ -void -_sysio_ioctx_cb_free(struct ioctx_callback *cb) -{ - - cb_free(cb); -} - -/* - * Complete an asynchronous IO request. - */ -void -_sysio_ioctx_complete(struct ioctx *ioctx) -{ - struct ioctx_callback *entry; - - - /* update IO stats */ - _SYSIO_UPDACCT(ioctx->ioctx_write, ioctx->ioctx_cc); - - /* - * Run the call-back queue. - */ - while ((entry = ioctx->ioctx_cbq.tqh_first)) { - TAILQ_REMOVE(&ioctx->ioctx_cbq, entry, iocb_next); - (*entry->iocb_f)(ioctx, entry->iocb_data); - cb_free(entry); - } - - /* - * Unlink from the file record's outstanding request queue. - */ - LIST_REMOVE(ioctx, ioctx_link); - - if (ioctx->ioctx_fast) - return; - - I_RELE(ioctx->ioctx_ino); - - free(ioctx); -} diff --git a/libsysio/src/iowait.c b/libsysio/src/iowait.c deleted file mode 100644 index 939b9216f7a3e5efd10442d5d549eeb8de626ba9..0000000000000000000000000000000000000000 --- a/libsysio/src/iowait.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" - -/* - * Asynch IO support for the API. - */ - -/* - * Poll status of asynch IO request. - */ -int -SYSIO_INTERFACE_NAME(iodone)(void *ioid) -{ - struct ioctx *ioctx; - int rc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - ioctx = _sysio_ioctx_find(ioid); - if (!ioctx) - SYSIO_INTERFACE_RETURN(-1, -EINVAL); - - rc = _sysio_ioctx_done(ioctx); - SYSIO_INTERFACE_RETURN(rc < 0 ? -1 : rc, rc < 0 ? rc : 0); -} - -/* - * Wait for completion of and return results from identified asynch IO - * request. - * - * The identifier is no longer valid after return. - */ -ssize_t -SYSIO_INTERFACE_NAME(iowait)(void *ioid) -{ - struct ioctx *ioctx; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - ioctx = _sysio_ioctx_find(ioid); - if (!ioctx) - SYSIO_INTERFACE_RETURN(-1, -EINVAL); - - cc = _sysio_ioctx_wait(ioctx); - SYSIO_INTERFACE_RETURN(cc < 0 ? -1 : cc, cc < 0 ? (int )cc : 0); -} diff --git a/libsysio/src/link.c b/libsysio/src/link.c deleted file mode 100644 index 638bd7e3be4fcb7c2f49cc56a1529652b66ef506..0000000000000000000000000000000000000000 --- a/libsysio/src/link.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "mount.h" -#include "inode.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(link)(const char *oldpath, const char *newpath) -{ - struct intent intent; - int err; - struct pnode *old, *new; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, 0, NULL, NULL); - err = _sysio_namei(_sysio_cwd, oldpath, 0, &intent, &old); - if (err) - goto out; - if (S_ISDIR(old->p_base->pb_ino->i_stbuf.st_mode)) { - err = -EPERM; - goto error1; - } - INTENT_INIT(&intent, INT_UPDPARENT, NULL, NULL); - new = NULL; - err = _sysio_namei(_sysio_cwd, newpath, ND_NEGOK, &intent, &new); - if (err) - goto error1; - if (new->p_base->pb_ino) { - err = -EEXIST; - goto error2; - } - if (old->p_mount->mnt_root != new->p_mount->mnt_root) { - err = -EXDEV; - goto error2; - } - /* - * Use the parent node operations to request the task in case the - * driver is implemented using differentiated inode operations based - * on file type, such as incore does. - */ - err = old->p_parent->p_base->pb_ino->i_ops.inop_link(old, new); - if (err) - goto error2; - /* - * The new p-node must be pointed at the inode referenced by the old. - */ - assert(!new->p_base->pb_ino && old->p_base->pb_ino); - new->p_base->pb_ino = old->p_base->pb_ino; - I_REF(new->p_base->pb_ino); - -error2: - P_RELE(new); -error1: - P_RELE(old); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __link -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(link), - PREPEND(__, SYSIO_INTERFACE_NAME(link))) -#endif diff --git a/libsysio/src/lseek.c b/libsysio/src/lseek.c deleted file mode 100644 index 91d865dcaee8c719bb4582cd8b1d5ebdd708dbdf..0000000000000000000000000000000000000000 --- a/libsysio/src/lseek.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2005 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <errno.h> -#include <unistd.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -#include "sysio-symbols.h" - -_SYSIO_OFF_T -_sysio_lseek_prepare(struct file *fil, - _SYSIO_OFF_T offset, - int whence, - _SYSIO_OFF_T max) -{ - _SYSIO_OFF_T off, pos; - struct intnl_stat stbuf; - - off = -1; - switch (whence) { - - case SEEK_SET: - off = 0; - break; - case SEEK_CUR: - off = fil->f_pos; - break; - case SEEK_END: - { - int err; - - /* - * Don't blindly trust the attributes - * in the inode record for this. Give the - * driver a chance to refresh them. - */ - err = - (*fil->f_ino->i_ops.inop_getattr)(NULL, - fil->f_ino, - &stbuf); - if (err) - return err; - - } - off = stbuf.st_size; - break; - default: - return -EINVAL; - } - pos = off + offset; - if ((offset < 0 && -offset > off) || (offset > 0 && pos <= off)) - return -EINVAL; - if (pos >= max) - return -EOVERFLOW; - return pos; -} - -static _SYSIO_OFF_T -_sysio_lseek(struct file *fil, - _SYSIO_OFF_T offset, - int whence, - _SYSIO_OFF_T max) -{ - _SYSIO_OFF_T pos; - - pos = _sysio_lseek_prepare(fil, offset, whence, max); - if (pos < 0) - return pos; - pos = (fil->f_ino->i_ops.inop_pos)(fil->f_ino, pos); - if (pos < 0) - return pos; - fil->f_pos = pos; - return pos; -} - -#ifdef _LARGEFILE64_SOURCE -#undef lseek64 - -extern off64_t -SYSIO_INTERFACE_NAME(lseek64)(int fd, off64_t offset, int whence) -{ - struct file *fil; - off64_t off; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN((off64_t )-1, -EBADF); - off = _sysio_lseek(fil, offset, whence, _SEEK_MAX(fil)); - SYSIO_INTERFACE_RETURN(off < 0 ? (off64_t )-1 : off, - off < 0 ? (int )off : 0); - -} -#ifdef __GLIBC__ -#undef __lseek64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(lseek64), - PREPEND(__, SYSIO_INTERFACE_NAME(lseek64))) -#endif -#ifdef REDSTORM -#undef __libc_lseek64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(lseek64), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_lseek64))) -#endif -#endif - -#undef lseek - -extern off_t -SYSIO_INTERFACE_NAME(lseek)(int fd, off_t offset, int whence) -{ - struct file *fil; - _SYSIO_OFF_T off; - off_t rtn; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN((off_t )-1, -EBADF); - off = _sysio_lseek(fil, offset, whence, LONG_MAX); - if (off < 0) - SYSIO_INTERFACE_RETURN((off_t )-1, (int )off); - rtn = (off_t )off; - assert(rtn == off); - SYSIO_INTERFACE_RETURN(rtn, 0); -} - -#ifdef __GLIBC__ -#undef __lseek -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(lseek), - PREPEND(__, SYSIO_INTERFACE_NAME(lseek))) -#endif - -#ifdef __linux__ -#undef llseek -int -SYSIO_INTERFACE_NAME(llseek)(unsigned int fd __IS_UNUSED, - unsigned long offset_high __IS_UNUSED, - unsigned long offset_low __IS_UNUSED, - loff_t *result __IS_UNUSED, - unsigned int whence __IS_UNUSED) -{ - struct file *fil; - loff_t off; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - /* - * This is just plain goofy. - */ - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); -#ifndef _LARGEFILE64_SOURCE - if (offset_high) { - /* - * We are using 32-bit internals. This just isn't - * going to work. - */ - SYSIO_INTERFACE_RETURN(-1, -EOVERFLOW); - } -#else - off = offset_high; - off <<= 32; - off |= offset_low; -#endif - off = _sysio_lseek(fil, off, whence, _SEEK_MAX(fil)); - if (off < 0) - SYSIO_INTERFACE_RETURN((off_t )-1, (int )off); - *result = off; - SYSIO_INTERFACE_RETURN(0, 0); -} - -#undef __llseek -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(llseek), - PREPEND(__, SYSIO_INTERFACE_NAME(llseek))) -#endif diff --git a/libsysio/src/mkdir.c b/libsysio/src/mkdir.c deleted file mode 100644 index a23e014c944634fe31feae728004419e8313fe31..0000000000000000000000000000000000000000 --- a/libsysio/src/mkdir.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "fs.h" -#include "mount.h" -#include "sysio-symbols.h" - -int -_sysio_mkdir(struct pnode *pno, mode_t mode) -{ - int err; - struct inode *parenti; - - if (pno->p_base->pb_ino) - return -EEXIST; - - err = _sysio_permitted(pno->p_parent, W_OK); - if (err) - return err; - - parenti = pno->p_parent->p_base->pb_ino; - assert(parenti); - return (*parenti->i_ops.inop_mkdir)(pno, mode); -} - -int -SYSIO_INTERFACE_NAME(mkdir)(const char *path, mode_t mode) -{ - int err; - struct intent intent; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, INT_CREAT, &mode, NULL); - err = _sysio_namei(_sysio_cwd, path, ND_NEGOK, &intent, &pno); - if (err) - goto out; - - mode &= ~(_sysio_umask & 0777); /* apply umask */ - err = _sysio_mkdir(pno, mode); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __mkdir -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(mkdir), - PREPEND(__, SYSIO_INTERFACE_NAME(mkdir))) -#endif diff --git a/libsysio/src/mknod.c b/libsysio/src/mknod.c deleted file mode 100644 index 51e5f7fa015ea53d3f4c5aa2d66f501ffe443cc0..0000000000000000000000000000000000000000 --- a/libsysio/src/mknod.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "fs.h" -#include "mount.h" - -#include "sysio-symbols.h" - -#undef mknod -#undef __xmknod - -/* - * Internal routine to make a device node. - */ -int -_sysio_mknod(struct pnode *pno, mode_t mode, dev_t dev) -{ - - if (pno->p_base->pb_ino) - return -EEXIST; - - /* - * Support only regular, character-special and fifos right now. - * (mode & S_IFMT) == 0 is the same as S_IFREG. - */ - if (!(S_ISREG(mode) || S_ISCHR(mode) || S_ISFIFO(mode))) - return -EINVAL; - - if (IS_RDONLY(pno)) - return -EROFS; - return (*pno->p_parent->p_base->pb_ino->i_ops.inop_mknod)(pno, - mode, - dev); -} - -int -PREPEND(__, SYSIO_INTERFACE_NAME(xmknod))(int __ver, - const char *path, - mode_t mode, - dev_t *dev) -{ - int err; - struct intent intent; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _MKNOD_VER) { - err = -ENOSYS; - goto out; - } - - mode &= ~(_sysio_umask & 0777); /* apply umask */ - - INTENT_INIT(&intent, INT_CREAT, &mode, NULL); - err = _sysio_namei(_sysio_cwd, path, ND_NEGOK, &intent, &pno); - if (err) - goto out; - - err = _sysio_permitted(pno->p_parent, W_OK); - if (err) - goto error; - err = _sysio_mknod(pno, mode, *dev); -error: - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef _xmknod -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(xmknod)), - PREPEND(_, SYSIO_INTERFACE_NAME(xmknod))) -#endif - -static int -PREPEND(__, SYSIO_INTERFACE_NAME(mknod))(const char *path, - mode_t mode, - dev_t dev) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(xmknod))(_MKNOD_VER, - path, - mode, - &dev); -} - -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(mknod)), - SYSIO_INTERFACE_NAME(mknod)) diff --git a/libsysio/src/module.mk b/libsysio/src/module.mk deleted file mode 100644 index ffd0c54eaa5ac66f0c6c631dd64c4929457a01d4..0000000000000000000000000000000000000000 --- a/libsysio/src/module.mk +++ /dev/null @@ -1,32 +0,0 @@ -# -# Note; Remove statvfs{,64}.c until we decide what to do with them. -# Lee; Tue Feb 24 09:37:32 EST 2004 -# - -if WITH_LUSTRE_HACK -FILE_SUPPORT = src/file_hack.c -else -FILE_SUPPORT = src/file.c -endif - -if WITH_LUSTRE_HACK -LUSTRE_SRCDIR_SRCS = src/stdlib.c -else -LUSTRE_SRCDIR_SRCS = -endif - -SRCDIR_SRCS = src/access.c src/chdir.c src/chmod.c \ - src/chown.c src/dev.c src/dup.c src/fcntl.c \ - src/fs.c src/fsync.c \ - src/getdirentries.c src/init.c src/inode.c \ - src/ioctl.c src/ioctx.c src/iowait.c \ - src/link.c src/lseek.c src/mkdir.c \ - src/mknod.c src/mount.c src/namei.c \ - src/open.c src/rw.c src/reconcile.c src/rename.c \ - src/rmdir.c src/stat64.c src/stat.c \ - src/stddir.c src/readdir.c src/readdir64.c \ - src/symlink.c src/readlink.c \ - src/truncate.c src/unlink.c src/utime.c \ - $(FILE_SUPPORT) $(LUSTRE_SRCDIR_SRCS) - -SRCDIR_EXTRA = src/module.mk diff --git a/libsysio/src/mount.c b/libsysio/src/mount.c deleted file mode 100644 index 6fed65506ad80940a3b90a82f0745f09c3b8005b..0000000000000000000000000000000000000000 --- a/libsysio/src/mount.c +++ /dev/null @@ -1,740 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#ifdef AUTOMOUNT_FILE_NAME -#include <fcntl.h> -#include <sys/uio.h> -#endif -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "fs.h" -#include "mount.h" -#include "inode.h" - -/* - * File system and volume mount support. - */ - -#ifdef AUTOMOUNT_FILE_NAME -/* - * Name of autmount specification file in a directory with - * the sticky-bit set. - */ -struct qstr _sysio_mount_file_name = { "", 0, 0 }; -#endif - -/* - * Active mounts. - */ -static LIST_HEAD(, mount) mounts; - -static int _sysio_sub_fsswop_mount(const char *source, - unsigned flags, - const void *data, - struct pnode *tocover, - struct mount **mntp); - -static struct fssw_ops _sysio_sub_fssw_ops = { - _sysio_sub_fsswop_mount -}; - -/* - * Initialization. Must be called before any other routine in this module. - */ -int -_sysio_mount_init() -{ - int err; - - LIST_INIT(&mounts); -#ifdef AUTOMOUNT_FILE_NAME - _sysio_next_component(AUTOMOUNT_FILE_NAME, &_sysio_mount_file_name); -#endif - - /* - * Register the sub-trees "file system" driver. - */ - err = _sysio_fssw_register("sub", &_sysio_sub_fssw_ops); - if (err) - return err; - - return 0; -} - -/* - * Mount rooted sub-tree somewhere in the existing name space. - */ -int -_sysio_do_mount(struct filesys *fs, - struct pnode_base *rootpb, - unsigned flags, - struct pnode *tocover, - struct mount **mntp) -{ - struct mount *mnt; - int err; - - /* - * It's really poor form to allow the new root to be a - * descendant of the pnode being covered. - */ - if (tocover) { - struct pnode_base *pb; - - for (pb = rootpb; - pb && pb != tocover->p_base; - pb = pb->pb_parent) - ; - if (pb == tocover->p_base) - return -EBUSY; - } - - /* - * Alloc - */ - mnt = malloc(sizeof(struct mount)); - if (!mnt) - return -ENOMEM; - err = 0; - /* - * Init enough to make the mount record usable to the path node - * generation routines. - */ - mnt->mnt_fs = fs; - if (fs->fs_flags & FS_F_RO) { - /* - * Propagate the read-only flag -- Whether they set it or not. - */ - flags |= MOUNT_F_RO; - } - mnt->mnt_flags = flags; - /* - * Get alias for the new root. - */ - mnt->mnt_root = - _sysio_p_new_alias(tocover ? tocover->p_parent : NULL, rootpb, mnt); - if (!mnt->mnt_root) { - err = -ENOMEM; - goto error; - } - /* - * It may have been a while since the root inode was validated; - * better validate again. And it better be a directory! - */ - err = _sysio_p_validate(mnt->mnt_root, NULL, NULL); - if (err) - goto error; - - if (!S_ISDIR(mnt->mnt_root->p_base->pb_ino->i_stbuf.st_mode)) { - err = -ENOTDIR; - goto error; - } - /* - * Cover up the mount point. - */ - mnt->mnt_covers = tocover; - if (!mnt->mnt_covers) { - /* - * New graph; It covers itself. - */ - mnt->mnt_covers = tocover = mnt->mnt_root; - } - assert(!tocover->p_cover); - tocover->p_cover = mnt->mnt_root; - - LIST_INSERT_HEAD(&mounts, mnt, mnt_link); - - *mntp = mnt; - return 0; - -error: - if (mnt->mnt_root) { - P_RELE(mnt->mnt_root); - _sysio_p_prune(mnt->mnt_root); - } - free(mnt); - return err; -} - -/* - * Remove mounted sub-tree from the system. - */ -int -_sysio_do_unmount(struct mount *mnt) -{ - struct pnode *root; - struct filesys *fs; - - root = mnt->mnt_root; - if (root->p_cover && root->p_cover != root) { - /* - * Active mount. - */ - return -EBUSY; - } - assert(mnt->mnt_covers->p_cover == root); - if (_sysio_p_prune(root) != 1) { - /* - * Active aliases. - */ - return -EBUSY; - } - /* - * We're committed. - * - * Drop ref of covered pnode and break linkage in name space. - */ - if (root->p_cover != root) - P_RELE(mnt->mnt_covers); - mnt->mnt_covers->p_cover = NULL; - LIST_REMOVE(mnt, mnt_link); - /* - * Kill the root. - */ - P_RELE(root); - root->p_cover = NULL; - _sysio_p_gone(root); - /* - * Release mount record resource. - */ - fs = mnt->mnt_fs; - free(mnt); - FS_RELE(fs); - - return 0; -} - -/* - * Establish the system name space. - */ -int -_sysio_mount_root(const char *source, - const char *fstype, - unsigned flags, - const void *data) -{ - struct fsswent *fssw; - int err; - struct mount *mnt; - - if (_sysio_root) - return -EBUSY; - - fssw = _sysio_fssw_lookup(fstype); - if (!fssw) - return -ENODEV; - - err = (*fssw->fssw_ops.fsswop_mount)(source, flags, data, NULL, &mnt); - if (err) - return err; - - _sysio_root = mnt->mnt_root; -#ifndef DEFER_INIT_CWD - /* - * It is very annoying to have to set the current working directory. - * So... If it isn't set, make it the root now. - */ - if (!_sysio_cwd) { - _sysio_cwd = _sysio_root; - P_REF(_sysio_cwd); - } -#endif - - return 0; -} - -int -_sysio_mount(struct pnode *cwd, - const char *source, - const char *target, - const char *filesystemtype, - unsigned long mountflags, - const void *data) -{ - int err; - struct fsswent *fssw; - struct intent intent; - struct pnode *tgt; - struct mount *mnt; - - /* - * Find the file system switch entry specified. - */ - fssw = _sysio_fssw_lookup(filesystemtype); - if (!fssw) - return -ENODEV; - - /* - * Look up the target path node. - */ - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(cwd, target, 0, &intent, &tgt); - if (err) - return err; - - if (tgt == _sysio_root) { - /* - * Attempting to mount over root. - */ - err = -EBUSY; - } else { - /* - * Do the deed. - */ - err = - (*fssw->fssw_ops.fsswop_mount)(source, - mountflags, - data, - tgt, - &mnt); - } - if (err) - P_RELE(tgt); - return err; -} - -int -SYSIO_INTERFACE_NAME(mount)(const char *source, - const char *target, - const char *filesystemtype, - unsigned long mountflags, - const void *data) -{ - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = - _sysio_mount(_sysio_cwd, - source, - target, - filesystemtype, - mountflags, - data); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -int -SYSIO_INTERFACE_NAME(umount)(const char *target) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - /* - * Look up the target path node. - */ - err = _sysio_namei(_sysio_cwd, target, 0, NULL, &pno); - if (err) - goto out; - P_RELE(pno); /* was ref'd */ - - /* - * Do the deed. - */ -#if 0 - if (!pno->p_cover) { - err = -EINVAL; - goto error; - } -#endif - assert(pno->p_mount); - err = _sysio_do_unmount(pno->p_mount); - -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -/* - * Unmount all file systems -- Usually as part of shutting everything down. - */ -int -_sysio_unmount_all() -{ - int err; - struct mount *mnt, *nxt; - struct pnode *pno; - - err = 0; - nxt = mounts.lh_first; - while ((mnt = nxt)) { - nxt = mnt->mnt_link.le_next; - pno = mnt->mnt_root; - /* - * If this is an automount generated mount, the root - * has no reference. We can cause the dismount with a - * simple prune. - */ - if (!_sysio_p_prune(pno)) - continue; -#ifdef notdef - /* - * Need a ref but only if this is not the root of a - * disconnected graph. If it is, then it is covered by itself - * and, so, already referenced. - */ - if (pno->p_cover != pno) - P_REF(pno); -#endif - err = _sysio_do_unmount(mnt); - if (err) { -#ifdef notdef - if (pno->p_cover != pno) - P_RELE(pno); -#endif - break; - } - if (pno == _sysio_root) - _sysio_root = NULL; - } - - return err; -} - -static int -_sysio_sub_fsswop_mount(const char *source, - unsigned flags, - const void *data __IS_UNUSED, - struct pnode *tocover, - struct mount **mntp) -{ - int err; - struct nameidata nameidata; - struct mount *mnt; - - /* - * How can we make a sub-mount from nothing? - */ - if (!_sysio_root) - return -EBUSY; - - /* - * Lookup the source. - */ - ND_INIT(&nameidata, 0, source, _sysio_root, NULL); - err = _sysio_path_walk(_sysio_root, &nameidata); - if (err) - return err; - - /* - * Mount the rooted sub-tree at the given position. - */ - err = - _sysio_do_mount(nameidata.nd_pno->p_mount->mnt_fs, - nameidata.nd_pno->p_base, - nameidata.nd_pno->p_mount->mnt_flags & flags, - tocover, - &mnt); - - /* - * Clean up and return. - */ - if (!err) { - FS_REF(nameidata.nd_pno->p_mount->mnt_fs); - *mntp = mnt; - } - P_RELE(nameidata.nd_pno); - return err; -} - -#ifdef AUTOMOUNT_FILE_NAME -/* - * Parse automount specification formatted as: - * - * <fstype>:<source>[[ \t]+<comma-separated-mount-options>] - * - * NB: - * The buffer sent is (almost) always modified. - */ -static int -parse_automount_spec(char *s, char **fstyp, char **srcp, char **optsp) -{ - int err; - char *cp; - char *fsty, *src, *opts; - - err = 0; - - /* - * Eat leading white. - */ - while (*s && *s == ' ' && *s == '\t') - s++; - /* - * Get fstype. - */ - fsty = cp = s; - while (*cp && - *cp != ':' && - *cp != ' ' && - *cp != '\t' && - *cp != '\r' && - *cp != '\n') - cp++; - if (fsty == cp || *cp != ':') - goto error; - *cp++ = '\0'; - - s = cp; - /* - * Eat leading white. - */ - while (*s && *s == ' ' && *s == '\t') - s++; - /* - * Get source. - */ - src = cp = s; - while (*cp && - *cp != ' ' && - *cp != '\t' && - *cp != '\r' && - *cp != '\n') - cp++; - if (src == cp) - goto error; - if (*cp) - *cp++ = '\0'; - - s = cp; - /* - * Eat leading white. - */ - while (*s && *s == ' ' && *s == '\t') - s++; - /* - * Get opts. - */ - opts = cp = s; - while (*cp && - *cp != ' ' && - *cp != '\t' && - *cp != '\r' && - *cp != '\n') - cp++; - if (opts == cp) - opts = NULL; - if (*cp) - *cp++ = '\0'; - - if (*cp) - goto error; - - *fstyp = fsty; - *srcp = src; - *optsp = opts; - return 0; - -error: - return -EINVAL; -} - -/* - * Parse (and strip) system mount options. - */ -static char * -parse_opts(char *opts, unsigned *flagsp) -{ - unsigned flags; - char *src, *dst; - char *cp; - - flags = 0; - src = dst = opts; - for (;;) { - cp = src; - while (*cp && *cp != ',') - cp++; - if (src + 2 == cp && strncmp(src, "rw", 2) == 0) { - /* - * Do nothing. This is the default. - */ - src += 2; - } else if (src + 2 == cp && strncmp(src, "ro", 2) == 0) { - /* - * Read-only. - */ - flags |= MOUNT_F_RO; - src += 2; - } - else if (src + 4 == cp && strncmp(src, "auto", 4) == 0) { - /* - * Enable automounts. - */ - flags |= MOUNT_F_AUTO; - src += 4; - } - if (src < cp) { - /* - * Copy what we didn't consume. - */ - if (dst != opts) - *dst++ = ','; - do - *dst++ = *src++; - while (src != cp); - } - if (!*src) - break; - *dst = '\0'; - src++; /* skip comma */ - } - *dst = '\0'; - - *flagsp = flags; - return opts; -} - -/* - * Attempt automount over the given directory. - */ -int -_sysio_automount(struct pnode *mntpno) -{ - int err; - struct inode *ino; - struct iovec iovec; - struct ioctx iocontext; - struct intnl_xtvec xtvec; - ssize_t cc; - char *fstype, *source, *opts; - unsigned flags; - struct fsswent *fssw; - struct mount *mnt; - - /* - * Revalidate -- Paranoia. - */ - err = _sysio_p_validate(mntpno, NULL, NULL); - if (err) - return err; - - /* - * Read file content. - */ - ino = mntpno->p_base->pb_ino; - if (ino->i_stbuf.st_size > 64 * 1024) { - /* - * Let's be reasonable. - */ - return -EINVAL; - } - iovec.iov_base = malloc(ino->i_stbuf.st_size + 1); - if (!iovec.iov_base) - return -ENOMEM; - iovec.iov_len = ino->i_stbuf.st_size; - err = _sysio_open(mntpno, O_RDONLY, 0); - if (err) - goto out; - xtvec.xtv_off = 0; - xtvec.xtv_len = ino->i_stbuf.st_size; - IOCTX_INIT(&iocontext, - 1, - 0, - ino, - &iovec, 1, - &xtvec, 1); - _sysio_ioctx_enter(&iocontext); - err = (*ino->i_ops.inop_read)(ino, &iocontext); - if (err) { - _sysio_ioctx_complete(&iocontext); - (void )(*ino->i_ops.inop_close)(ino); - goto out; - } - cc = _sysio_ioctx_wait(&iocontext); - err = (*ino->i_ops.inop_close)(ino); - if (err) - goto out; - if (cc < 0) { - err = (int )cc; - goto out; - } - ((char *)iovec.iov_base)[cc] = '\0'; - - /* - * Parse. - */ - err = parse_automount_spec(iovec.iov_base, &fstype, &source, &opts); - if (err) - goto out; - flags = 0; - if (opts) - opts = parse_opts(opts, &flags); - - /* - * Find the file system switch entry specified. - */ - fssw = _sysio_fssw_lookup(fstype); - if (!fssw) { - err = -ENODEV; - goto out; - } - - /* - * Do the deed. - */ - P_REF(mntpno->p_parent); - err = - (*fssw->fssw_ops.fsswop_mount)(source, - flags, - opts, - mntpno->p_parent, - &mnt); - if (err) - P_RELE(mntpno->p_parent); - -out: - if (iovec.iov_base) - free(iovec.iov_base); - return err; -} -#endif diff --git a/libsysio/src/namei.c b/libsysio/src/namei.c deleted file mode 100644 index 7f8c1e45427df2615d8967b64fd5c9b986fe67d0..0000000000000000000000000000000000000000 --- a/libsysio/src/namei.c +++ /dev/null @@ -1,509 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <unistd.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "mount.h" -#include "inode.h" - -/* - * Parse next component in path. - */ -#ifndef AUTOMOUNT_FILE_NAME -static -#endif -void -_sysio_next_component(const char *path, struct qstr *name) -{ - while (*path == PATH_SEPARATOR) - path++; - name->name = path; - name->len = 0; - name->hashval = 0; - while (*path && *path != PATH_SEPARATOR) { - name->hashval = - 37 * name->hashval + *path++; - name->len++; - } -} - -/* - * Given parent, look up component. - */ -static int -lookup(struct pnode *parent, - struct qstr *name, - struct pnode **pnop, - struct intent *intnt, - const char *path, - int check_permissions) -{ - int err; - struct pnode *pno; - - if (!parent->p_base->pb_ino) - return -ENOTDIR; - - /* - * Sometimes we don't want to check permissions. At initialization - * time, for instance. - */ - if (check_permissions) { - err = _sysio_permitted(parent, X_OK); - if (err) - return err; - } - - /* - * Short-circuit `.' and `..'; We don't cache those. - */ - pno = NULL; - if (name->len == 1 && name->name[0] == '.') - pno = parent; - else if (name->len == 2 && name->name[0] == '.' && name->name[1] == '.') - pno = parent->p_parent; - if (pno) - P_REF(pno); - else { - /* - * Get cache entry then. - */ - err = _sysio_p_find_alias(parent, name, &pno); - if (err) - return err; - } - - /* - * While covered, move to the covering node. - */ - while (pno->p_cover && pno->p_cover != pno) { - struct pnode *cover; - - cover = pno->p_cover; - P_REF(cover); - P_RELE(pno); - pno = cover; - } - - *pnop = pno; - - /* - * (Re)validate the pnode. - */ - err = _sysio_p_validate(pno, intnt, path); - if (err) - return err; - - return 0; -} - -/* - * The meat. Walk an absolute or relative path, looking up each - * component. Various flags in the nameidata argument govern actions - * and return values/state. They are: - * - * ND_NOFOLLOW symbolic links are not followed - * ND_NEGOK if terminal/leaf does not exist, return - * path node (alias) anyway. - * ND_NOPERMCHECK do not check permissions - */ -int -_sysio_path_walk(struct pnode *parent, struct nameidata *nd) -{ - int err; - const char *path; - struct qstr this, next; - struct inode *ino; - - /* - * NULL path? - */ - if (!nd->nd_path) - return -EFAULT; - - /* - * Empty path? - */ - if (!*nd->nd_path) - return -ENOENT; - - /* - * Leading slash? - */ - if (*nd->nd_path == PATH_SEPARATOR) { - /* - * Make parent the root of the name space. - */ - parent = nd->nd_root; - } - -#ifdef DEFER_INIT_CWD - if (!parent) { - const char *icwd; - - if (!_sysio_init_cwd && !nd->nd_root) - abort(); - - /* - * Finally have to set the current working directory. We can - * not tolerate errors here or else risk leaving the process - * in a very unexpected location. We abort then unless all goes - * well. - */ - icwd = _sysio_init_cwd; - _sysio_init_cwd = NULL; - parent = nd->nd_root; - if (!parent) - abort(); - (void )_sysio_namei(nd->nd_root, icwd, 0, NULL, &parent); - if (_sysio_p_chdir(parent) != 0) - abort(); - } -#endif - - /* - * (Re)Validate the parent. - */ - err = _sysio_p_validate(parent, NULL, NULL); - if (err) - return err; - - /* - * Prime everything for the loop. Will need another reference to the - * initial directory. It'll be dropped later. - */ - nd->nd_pno = parent; - P_REF(nd->nd_pno); - _sysio_next_component(nd->nd_path, &next); - path = next.name; - parent = NULL; - err = 0; - - /* - * Derecurse the path tree-walk. - */ - for (;;) { - ino = nd->nd_pno->p_base->pb_ino; - if (S_ISLNK(ino->i_stbuf.st_mode) && - (next.len || !(nd->nd_flags & ND_NOFOLLOW))) { - char *lpath; - ssize_t cc; - struct nameidata nameidata; - - if (nd->nd_slicnt >= MAX_SYMLINK) { - err = -ELOOP; - break; - } - - /* - * Follow symbolic link. - */ - lpath = malloc(MAXPATHLEN + 1); - if (!lpath) { - err = -ENOMEM; - break; - } - cc = - ino->i_ops.inop_readlink(nd->nd_pno, - lpath, - MAXPATHLEN); - if (cc < 0) { - free(lpath); - err = (int )cc; - break; - } - lpath[cc] = '\0'; /* NUL term */ - /* - * Handle symbolic links with recursion. Yuck! - * Pass the NULL intent for recursive symlink - * except the last component. - */ - ND_INIT(&nameidata, - (nd->nd_flags | ND_NEGOK), - lpath, - nd->nd_root, - !next.len ? nd->nd_intent : NULL); - nameidata.nd_slicnt = nd->nd_slicnt + 1; - err = - _sysio_path_walk(nd->nd_pno->p_parent, &nameidata); - free(lpath); - if (err) - break; - P_RELE(nd->nd_pno); - nd->nd_pno = nameidata.nd_pno; - ino = nd->nd_pno->p_base->pb_ino; - } -#ifdef AUTOMOUNT_FILE_NAME - else if (ino && - S_ISDIR(ino->i_stbuf.st_mode) && - (nd->nd_pno->p_mount->mnt_flags & MOUNT_F_AUTO) && - nd->nd_amcnt < MAX_MOUNT_DEPTH && - ino->i_stbuf.st_mode & S_ISUID) { - struct pnode *pno; - - /* - * We're committed to a lookup. It's time to see if - * we're going to do it in an automount-point and - * arrange the mount if so. - */ - assert(!nd->nd_pno->p_cover); - err = - lookup(nd->nd_pno, - &_sysio_mount_file_name, - &pno, - NULL, - NULL, - 1); - if (pno) - P_RELE(pno); - if (!err && _sysio_automount(pno) == 0) { - struct pnode *root; - - /* - * All went well. Need to switch - * parent pno and ino to the - * root of the newly mounted sub-tree. - * - * NB: - * We don't recurseively retry these - * things. It's OK to have the new root - * be an automount-point but it's going - * to take another lookup to accomplish it. - * The alternative could get us into an - * infinite loop. - */ - root = nd->nd_pno->p_cover; - assert(root); - P_RELE(nd->nd_pno); - nd->nd_pno = root; -#if 0 - P_REF(nd->nd_pno); -#endif - ino = nd->nd_pno->p_base->pb_ino; - assert(ino); - - /* - * Must send the intent-path again. - */ - path = nd->nd_path; - nd->nd_amcnt++; - - /* - * Must go back top and retry with this - * new pnode as parent. - */ - continue; - } - err = 0; /* it never happened */ - } -#endif - - /* - * Set up for next component. - */ - this = next; - if (path) - path = this.name; - if (!this.len) - break; - if (!ino) { - /* - * Should only be here if final component was - * target of a symlink. - */ - nd->nd_path = this.name + this.len; - err = -ENOENT; - break; - } - nd->nd_path = this.name + this.len; - _sysio_next_component(nd->nd_path, &next); - parent = nd->nd_pno; - nd->nd_pno = NULL; - - /* - * Parent must be a directory. - */ - if (ino && !S_ISDIR(ino->i_stbuf.st_mode)) { - err = -ENOTDIR; - break; - } - - /* - * The extra path arg is passed only on the first lookup in the - * walk as we cross into each file system, anew. The intent is - * passed both on the first lookup and when trying to look up - * the final component -- Of the original path, not on the - * file system. - * - * Confused? Me too and I came up with this weirdness. It's - * hints to the file system drivers. Read on. - * - * The first lookup will give everything one needs to ready - * everything for the entire operation before the path is - * walked. The file system driver knows it's the first lookup - * in the walk because it has both the path and the intent. - * - * Alternatively, one could split the duties; The first lookup - * can be used to prime the file system inode cache with the - * interior nodes we'll want in the path-walk. Then, when - * looking up the last component, ready everything for the - * operations(s) to come. The file system driver knows it's - * the last lookup in the walk because it has the intent, - * again, but without the path. - * - * One special case; If we were asked to look up a single - * component, we treat it as the last component. The file - * system driver never sees the extra path argument. It should - * be noted that the driver always has the fully qualified - * path, on the target file system, available to it for any - * node it is looking up, including the last, via the base - * path node and it's ancestor chain. - */ - err = - lookup(parent, - &this, - &nd->nd_pno, - (path || !next.len) - ? nd->nd_intent - : NULL, - (path && next.len) ? path : NULL, - !(nd->nd_flags & ND_NOPERMCHECK)); - if (err) { - if (err == -ENOENT && - !next.len && - (nd->nd_flags & ND_NEGOK)) - err = 0; - break; - } - path = NULL; /* Stop that! */ - if ((parent->p_mount->mnt_fs != - nd->nd_pno->p_mount->mnt_fs)) { - /* - * Crossed into a new fs. We'll want the next lookup - * to include the path again. - */ - path = nd->nd_path; - } - - /* - * Release the parent. - */ - P_RELE(parent); - parent = NULL; - } - - /* - * Trailing separators cause us to break from the loop with - * a parent set but no pnode. Check for that. - */ - if (!nd->nd_pno) { - nd->nd_pno = parent; - parent = NULL; - /* - * Make sure the last processed component was a directory. The - * trailing slashes are illegal behind anything else. - */ - if (!(err || - S_ISDIR(nd->nd_pno->p_base->pb_ino->i_stbuf.st_mode))) - err = -ENOTDIR; - } - - /* - * Drop reference to parent if set. Either we have a dup of the original - * parent or an intermediate reference. - */ - if (parent) - P_RELE(parent); - - /* - * On error, we will want to drop our reference to the current - * path node if at end. - */ - if (err && nd->nd_pno) { - P_RELE(nd->nd_pno); - nd->nd_pno = NULL; - } - - return err; -} - -#ifdef CPLANT_YOD -/* - * for backward compatibility w/protocol switch - * remove everything up to the first ':' - * fortran libs prepend cwd to path, so not much choice - */ -#define STRIP_PREFIX(p) strchr(p,':') ? strchr(p,':')+1 : p -#else -#define STRIP_PREFIX(p) p -#endif - -/* - * Expanded form of the path-walk routine, with the common arguments, builds - * the nameidata bundle and calls path-walk. - */ -int -_sysio_namei(struct pnode *parent, - const char *path, - unsigned flags, - struct intent *intnt, - struct pnode **pnop) -{ - struct nameidata nameidata; - int err; - - ND_INIT(&nameidata, flags, STRIP_PREFIX(path), _sysio_root, intnt); - err = _sysio_path_walk(parent, &nameidata); - if (!err) - *pnop = nameidata.nd_pno; - return err; -} diff --git a/libsysio/src/open.c b/libsysio/src/open.c deleted file mode 100644 index 86426dace65312f7b4f441c1fd4aad7feb3d9d07..0000000000000000000000000000000000000000 --- a/libsysio/src/open.c +++ /dev/null @@ -1,308 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Incorporate the GNU flags for open if we can. - */ -#define _GNU_SOURCE - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "fs.h" -#include "mount.h" -#include "sysio-symbols.h" - -/* - * Open file support. - */ - -mode_t _sysio_umask = 0; /* process umask. */ - -/* - * Internal form of open. - */ -int -_sysio_open(struct pnode *pno, int flags, mode_t mode) -{ - int ro; - int w; - int err; - struct inode *ino; - - ro = IS_RDONLY(pno); - w = flags & (O_WRONLY|O_RDWR); - if (w == (O_WRONLY|O_RDWR)) { - /* - * Huh? - */ - return -EINVAL; - } - if (w && ro) - return -EROFS; - ino = pno->p_base->pb_ino; - if ((flags & O_CREAT) && !ino) { - struct pnode *parent; - - /* - * Must create it. - */ - if (ro) - return -EROFS; - parent = pno->p_parent; - err = _sysio_p_validate(parent, NULL, NULL); - if (!err) { - ino = parent->p_base->pb_ino; - assert(ino); - err = (*ino->i_ops.inop_open)(pno, flags, mode); - } - } else if ((flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) - err = -EEXIST; - else if (!ino) - err = _sysio_p_validate(pno, NULL, NULL); -#ifdef O_NOFOLLOW - else if (flags & O_NOFOLLOW && S_ISLNK(ino->i_stbuf.st_mode)) - err = -ELOOP; -#endif - else { - /* - * Simple open of pre-existing file. - */ - err = (*ino->i_ops.inop_open)(pno, flags, mode); - } - - return err; -} - -#undef open - -int -SYSIO_INTERFACE_NAME(open)(const char *path, int flags, ...) -{ - mode_t mode; - unsigned ndflags; - struct intent intent; - int rtn; - struct pnode *pno; - struct file *fil; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - /* - * Get mode argument and determine parameters for namei - */ - mode = 0; - ndflags = 0; - intent.int_opmask = INT_OPEN; - if (flags & O_CREAT) { - va_list ap; - - /* - * Set ndflags to indicate return of negative alias is OK. - */ - ndflags |= ND_NEGOK; - - /* - * Will need mode too. - */ - va_start(ap, flags); - mode = -#ifndef REDSTORM - va_arg(ap, mode_t); -#else - va_arg(ap, int); -#endif - va_end(ap); - mode &= ~(_sysio_umask & 0777) | 07000; /* apply umask */ - intent.int_opmask |= INT_CREAT; - } -#ifdef O_NOFOLLOW - if (flags & O_NOFOLLOW) - ndflags |= ND_NOFOLLOW; -#endif - - /* - * Find the file. - */ - fil = NULL; - INTENT_INIT(&intent, intent.int_opmask, &mode, &flags); - pno = NULL; - rtn = _sysio_namei(_sysio_cwd, path, ndflags, &intent, &pno); - if (rtn) - goto error; - /* - * Ask for the open/creat. - */ - rtn = _sysio_open(pno, flags, mode); - if (rtn) - goto error; - /* - * Get a file descriptor. - */ - fil = _sysio_fnew(pno->p_base->pb_ino, flags); - if (!fil) { - rtn = -ENOMEM; - goto error; - } - rtn = _sysio_fd_set(fil, -1, 0); - if (rtn < 0) - goto error; - - P_RELE(pno); - - SYSIO_INTERFACE_RETURN(rtn, 0); - -error: - if (fil) - F_RELE(fil); - if (pno) - P_RELE(pno); - SYSIO_INTERFACE_RETURN(-1, rtn); -} - -#ifdef __GLIBC__ -#undef __open -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(open), - PREPEND(__, SYSIO_INTERFACE_NAME(open))) -#undef open64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(open), SYSIO_INTERFACE_NAME(open64)) -#undef __open64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(open), - PREPEND(__, SYSIO_INTERFACE_NAME(open64))) -#endif - -#ifdef REDSTORM -#undef __libc_open64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(open), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_open64))) -#endif - -#ifdef BSD -#undef _open -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(open), - PREPEND(_, SYSIO_INTERFACE_NAME(open))) -#endif - -int -SYSIO_INTERFACE_NAME(close)(int fd) -{ - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_fd_close(fd); - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef __GLIBC__ -#undef __close -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(close), - PREPEND(__, SYSIO_INTERFACE_NAME(close))) -#endif - -#ifdef BSD -#undef _close -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(close), - PREPEND(_, SYSIO_INTERFACE_NAME(close))) -#endif - -int -SYSIO_INTERFACE_NAME(creat)(const char *path, mode_t mode) -{ - - return SYSIO_INTERFACE_NAME(open)(path, O_CREAT|O_WRONLY|O_TRUNC, mode); -} - -#ifdef __GLIBC__ -#undef __creat -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(creat), - PREPEND(__, SYSIO_INTERFACE_NAME(creat))) -#undef creat64 -#ifndef HAVE_LUSTRE_HACK -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(creat), SYSIO_INTERFACE_NAME(creat64)) -#else -/* XXX workaround SuSE SLES 8, glibc-2.2.5 */ -sysio_sym_strong_alias(SYSIO_INTERFACE_NAME(creat), - SYSIO_INTERFACE_NAME(creat64)) -#endif -#undef __creat64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(creat), - PREPEND(__, SYSIO_INTERFACE_NAME(creat64))) -#endif - -#ifdef REDSTORM -#undef __libc_creat -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(creat), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_creat))) -#endif - -#ifdef BSD -#undef _creat -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(creat), - PREPEND(_, SYSIO_INTERFACE_NAME(creat))) -#endif - -mode_t -SYSIO_INTERFACE_NAME(umask)(mode_t mask) -{ - mode_t omask; - - omask = _sysio_umask; - _sysio_umask = mask & 0777; - return omask; -} - -#ifdef REDSTORM -#undef __umask -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(umask), - PREPEND(__, SYSIO_INTERFACE_NAME(umask))) -#endif diff --git a/libsysio/src/readdir.c b/libsysio/src/readdir.c deleted file mode 100644 index c232b2928dc816235183d956ceb24c5c9d8b7e08..0000000000000000000000000000000000000000 --- a/libsysio/src/readdir.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifdef __linux__ -#include <features.h> -#if defined(__GLIBC__) && !defined(REDSTORM) - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <fcntl.h> -#include <dirent.h> -#include <sysio.h> - -#include "sysio-symbols.h" - -#ifndef _READDIR -#define _READDIR SYSIO_INTERFACE_NAME(readdir) -#define _SCANDIR SYSIO_INTERFACE_NAME(scandir) -#define _GETDIRENTRIES SYSIO_INTERFACE_NAME(getdirentries) -#define _DIRENT_T struct dirent -#define _OFF_T off_t -#endif - -#include "stddir.h" - -_DIRENT_T * -_READDIR(DIR *dir) -{ - _DIRENT_T *dp = NULL; - _OFF_T dbase; - -#ifndef BSD - ssize_t rc; -#else - int rc; -#endif - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - /* need to read new data? */ - rc = 0; - if (dir->cur >= dir->effective) { - dir->cur = 0; - dbase = (_OFF_T )dir->base; - if (sizeof(dbase) != sizeof(dir->base) && - dbase != dir->base) { - dir->effective = 0; - SYSIO_INTERFACE_RETURN(NULL, -EOVERFLOW); - } - rc = _GETDIRENTRIES(dir->fd, - dir->buf, -#ifndef BSD - (size_t )BUFSIZE, - (_OFF_T *) &dbase); -#else - (int )BUFSIZE, - (long *) __restrict dbase); -#endif - dir->base = (_SYSIO_OFF_T )dbase; - - /* error or end-of-file */ - if (rc == -ENOENT) - rc = 0; - if (rc <= 0) { - dir->effective = 0; - SYSIO_INTERFACE_RETURN(NULL, rc); - } - dir->effective = rc; - } - dp = (_DIRENT_T *)(dir->buf + dir->cur); - -#ifdef _DIRENT_HAVE_D_RECLEN - dir->cur += dp->d_reclen; -#else - dir->cur += sizeof(_DIRENT_T); -#endif -#ifdef _DIRENT_HAVE_D_OFF - dir->filepos = dp->d_off; -#else - dir->filepos = dir->cur; -#endif - - SYSIO_INTERFACE_RETURN(dp, 0); -} - -sysio_sym_weak_alias(_READDIR, PREPEND(__,_READDIR)) - -int -_SCANDIR(const char *dirname, - _DIRENT_T ***namelist, - int (*filter) (const _DIRENT_T *), - int (*compar) (const void *, const void *)) -{ - DIR *dir = NULL; - _DIRENT_T *de = NULL, - *nextde = NULL, - **s = NULL; - int n = 32, i = 0; - size_t desize; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - if ((dir = SYSIO_INTERFACE_NAME(opendir)(dirname)) == NULL) - SYSIO_INTERFACE_RETURN(-1, -errno); - - while ((de = _READDIR(dir)) != NULL) { - if ((filter == NULL) || filter(de)) { - if (i == 0 || i >= n) { - n = MAX(n, 2*i); - s = (_DIRENT_T **)realloc(s, - (size_t )(n * sizeof(_DIRENT_T *))); - if (!s) - SYSIO_INTERFACE_RETURN(-1, -ENOMEM); - } - desize = &de->d_name[_D_ALLOC_NAMLEN(de)] - (char * )de; - nextde = (_DIRENT_T *)malloc(desize); - if (!nextde) - SYSIO_INTERFACE_RETURN(-1, -ENOMEM); - - s[i++] = (_DIRENT_T *)memcpy(nextde, de, desize); - } - } - if (compar) - qsort (s, - i, - sizeof (*s), - (int (*)(const void *, const void *))compar); - - *namelist = s; - - SYSIO_INTERFACE_NAME(closedir)(dir); - - SYSIO_INTERFACE_RETURN(i, 0); -} - -sysio_sym_weak_alias(_SCANDIR, PREPEND(__,_SCANDIR)) - -#endif -#endif diff --git a/libsysio/src/readdir64.c b/libsysio/src/readdir64.c deleted file mode 100644 index f6d54a8fea44466e2b5c43698e19cc00a54dacd4..0000000000000000000000000000000000000000 --- a/libsysio/src/readdir64.c +++ /dev/null @@ -1,10 +0,0 @@ -#ifdef _LARGEFILE64_SOURCE -#define _SCANDIR SYSIO_INTERFACE_NAME(scandir64) -#define _READDIR SYSIO_INTERFACE_NAME(readdir64) -#define _GETDIRENTRIES SYSIO_INTERFACE_NAME(getdirentries64) -#define _DIRENT_T struct dirent64 -#define _OFF_T _SYSIO_OFF_T - -#include "readdir.c" - -#endif diff --git a/libsysio/src/readlink.c b/libsysio/src/readlink.c deleted file mode 100644 index c8e74366bf63049f6191089cd76439787558e8e9..0000000000000000000000000000000000000000 --- a/libsysio/src/readlink.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "sysio-symbols.h" - -#ifdef HAVE_POSIX_1003_READLINK -ssize_t -#else -int -#endif -SYSIO_INTERFACE_NAME(readlink)(const char *path, char *buf, size_t bufsiz) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, path, ND_NOFOLLOW, &intent, &pno); - if (err) - goto out; - ino = pno->p_base->pb_ino; - if (!S_ISLNK(ino->i_stbuf.st_mode)) { - err = -EINVAL; - goto error; - } - err = (*ino->i_ops.inop_readlink)(pno, buf, bufsiz); -error: - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err < 0 ? -1 : err, err >= 0 ? 0 : err); -} - -#ifdef REDSTORM -#undef __readlink -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(readlink), - PREPEND(__, SYSIO_INTERFACE_NAME(readlink))) -#endif diff --git a/libsysio/src/reconcile.c b/libsysio/src/reconcile.c deleted file mode 100644 index 8fa01fdf38e5f5ec3d2d2a1ff914025890e83844..0000000000000000000000000000000000000000 --- a/libsysio/src/reconcile.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/uio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" - -/* - * Extent-vector IO support. - */ - -/* - * Arguments to IO vector enumerator callback when used by _sysio_doio(). - */ -struct doio_helper_args { - ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *); /* base func */ - void *arg; /* caller arg */ -}; - -/* - * General help validating strided-IO vectors. - * - * A driver may call this to make sure underflow/overflow of an off_t can't - * occur and overflow of a ssize_t can't occur when writing. The sum - * of the reconciled transfer length is returned or some appropriate - * error depending on underflow/overflow. - * - * The following algorithm assumes: - * - * a) sizeof(size_t) >= sizeof(ssize_t) - * b) 2's complement arithmetic - * c) The compiler won't optimize away code because it's developers - * believed that something with an undefined result in `C' can't happen. - */ -ssize_t -_sysio_validx(const struct intnl_xtvec *xtv, size_t xtvlen, - const struct iovec *iov, size_t iovlen, - _SYSIO_OFF_T limit) -{ - ssize_t acc, cc; - struct iovec iovec; - struct intnl_xtvec xtvec; - _SYSIO_OFF_T off; - - if (!(xtvlen && iovlen)) - return -EINVAL; - - acc = 0; - xtvec.xtv_len = iovec.iov_len = 0; - do { - while (!xtvec.xtv_len) { - if (!xtvlen--) - break; - if (!xtv->xtv_len) { - xtv++; - continue; - } - xtvec = *xtv++; - if (xtvec.xtv_off < 0) - return -EINVAL; - } - if (!xtvec.xtv_len) - break; - do { - while (!iovec.iov_len) { - if (!iovlen--) - break; - if (!iov->iov_len) { - iov++; - continue; - } - iovec = *iov++; - } - if (!iovec.iov_len) - break; - cc = iovec.iov_len; - if (cc < 0) - return -EINVAL; - if ((size_t )cc > xtvec.xtv_len) - cc = xtvec.xtv_len; - xtvec.xtv_len -= cc; - iovec.iov_len -= cc; - off = xtvec.xtv_off + cc; - if (xtvec.xtv_off && off <= xtvec.xtv_off) - return off < 0 ? -EINVAL : -EOVERFLOW; - if (off > limit) - return -EFBIG; - xtvec.xtv_off = off; - cc += acc; - if (acc && (cc <= acc)) - return -EINVAL; - acc = cc; - } while (xtvec.xtv_len && iovlen); - } while ((xtvlen || xtvec.xtv_len) && iovlen); - return acc; -} - -/* - */ -ssize_t -_sysio_enumerate_extents(const struct intnl_xtvec *xtv, size_t xtvlen, - const struct iovec *iov, size_t iovlen, - ssize_t (*f)(const struct iovec *, int, - _SYSIO_OFF_T, - ssize_t, - void *), - void *arg) -{ - ssize_t acc, tmp, cc; - struct iovec iovec; - struct intnl_xtvec xtvec; - const struct iovec *start; - _SYSIO_OFF_T off; - size_t n; - size_t remain; - - acc = 0; - iovec.iov_len = 0; - while (xtvlen) { - /* - * Coalesce contiguous extent vector entries. - */ - off = xtvec.xtv_off = xtv->xtv_off; - off += xtvec.xtv_len = xtv->xtv_len; - while (++xtv, --xtvlen) { - if (off != xtv->xtv_off) { - /* - * Not contiguous. - */ - break; - } - if (!xtv->xtv_len) { - /* - * Zero length. - */ - continue; - } - off += xtv->xtv_len; - xtvec.xtv_len += xtv->xtv_len; - } - while (xtvec.xtv_len) { - if (iovec.iov_len) { - tmp = iovec.iov_len; - if (iovec.iov_len > xtvec.xtv_len) - iovec.iov_len = xtvec.xtv_len; - cc = - (*f)(&iovec, 1, - xtvec.xtv_off, - xtvec.xtv_len, - arg); - if (cc <= 0) { - if (acc) - return acc; - return cc; - } - iovec.iov_base = (char *)iovec.iov_base + cc; - iovec.iov_len = tmp - cc; - tmp = cc + acc; - if (acc && tmp <= acc) - abort(); /* paranoia */ - acc = tmp; - } else if (iovlen) { - start = iov; - n = xtvec.xtv_len; - do { - if (iov->iov_len > n) { - /* - * That'll do. - */ - break; - } - n -= iov->iov_len; - iov++; - } while (--iovlen); - if (iov == start) { - iovec = *iov++; - iovlen--; - continue; - } - remain = xtvec.xtv_len - n; - cc = - (*f)(start, iov - start, - xtvec.xtv_off, - remain, - arg); - if (cc <= 0) { - if (acc) - return acc; - return cc; - } - - tmp = cc + acc; - if (acc && tmp <= acc) - abort(); /* paranoia */ - acc = tmp; - - remain -= cc; - if (remain) - return acc; /* short */ - } else - return acc; /* short out */ - xtvec.xtv_off += cc; - xtvec.xtv_len -= cc; - } - } - return acc; -} - -ssize_t -_sysio_enumerate_iovec(const struct iovec *iov, size_t count, - _SYSIO_OFF_T off, - ssize_t limit, - ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *), - void *arg) -{ - ssize_t acc, cc; - size_t n; - unsigned indx; - size_t remain; - - if (!count) - return -EINVAL; - assert(limit >= 0); - acc = 0; - n = limit; - for (indx = 0; n && indx < count; indx++) { - if (iov[indx].iov_len < n) { - cc = (ssize_t )iov[indx].iov_len; - if (cc < 0) - return -EINVAL; - } else - cc = (ssize_t )n; - if (!cc) - continue; - n -= cc; - cc += acc; - if (acc && cc <= acc) - return -EINVAL; - acc = cc; - } - if (!acc) - return 0; - acc = 0; - do { - if (!iov->iov_len) { - iov++; - continue; - } - n = - iov->iov_len < (size_t )limit - ? iov->iov_len - : (size_t )limit; - cc = (*f)(iov->iov_base, n, off, arg); - if (cc <= 0) { - if (acc) - return acc; - return cc; - } - off += cc; - limit -= cc; - remain = iov->iov_len - cc; - cc += acc; - if (acc && cc <= acc) - abort(); /* bad driver! */ - acc = cc; - if (remain || !limit) - break; /* short/limited read */ - iov++; - } while (--count); - return acc; -} - -static ssize_t -_sysio_doio_helper(const struct iovec *iov, int count, - _SYSIO_OFF_T off, - ssize_t limit, - struct doio_helper_args *args) -{ - - return _sysio_enumerate_iovec(iov, count, - off, limit, - args->f, - args->arg); -} - -/* - * A meta-driver for the whole strided-io process. Appropriate when - * the driver can't handle anything but simple p{read,write}-like - * interface. - */ -ssize_t -_sysio_doio(const struct intnl_xtvec *xtv, size_t xtvlen, - const struct iovec *iov, size_t iovlen, - ssize_t (*f)(void *, size_t, _SYSIO_OFF_T, void *), - void *arg) -{ - struct doio_helper_args arguments; - - arguments.f = f; - arguments.arg = arg; - return _sysio_enumerate_extents(xtv, xtvlen, - iov, iovlen, - (ssize_t (*)(const struct iovec *, int, - _SYSIO_OFF_T, - ssize_t, - void *))_sysio_doio_helper, - &arguments); -} diff --git a/libsysio/src/rename.c b/libsysio/src/rename.c deleted file mode 100644 index c0f496718ba916bb833963a28ba5734c26607173..0000000000000000000000000000000000000000 --- a/libsysio/src/rename.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "mount.h" -#include "inode.h" - -int -SYSIO_INTERFACE_NAME(rename)(const char *oldpath, const char *newpath) -{ - struct intent intent; - int err; - struct pnode *old, *new; - struct pnode_base *nxtpb, *pb; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - /* - * Neither old nor new may be the empty string. - */ - if (*oldpath == '\0' || *newpath == '\0') - SYSIO_INTERFACE_RETURN(-1, -ENOENT); - - /* - * Resolve oldpath to a path node. - */ - INTENT_INIT(&intent, INT_UPDPARENT, NULL, NULL); - err = _sysio_namei(_sysio_cwd, oldpath, ND_NOFOLLOW, &intent, &old); - if (err) - goto error3; - /* - * Resolve newpath to a path node. - */ - INTENT_INIT(&intent, INT_UPDPARENT, NULL, NULL); - err = - _sysio_namei(_sysio_cwd, - newpath, - ND_NOFOLLOW | ND_NEGOK, - &intent, - &new); - if (err) - goto error2; - - /* - * Don't allow mount points to move. - */ - if (old->p_mount->mnt_root == old || old->p_cover || - new->p_mount->mnt_root == new) { - err = -EBUSY; - goto error1; - } - - /* - * No xdev renames either. - */ - if (old->p_mount->mnt_fs != new->p_mount->mnt_fs) { - err = -EXDEV; - goto error1; - } - - /* - * Make sure the old pnode can't be found in the ancestor chain - * for the new. If it can, they are trying to move into a subdirectory - * of the old. - */ - nxtpb = new->p_base; - do { - pb = nxtpb; - nxtpb = pb->pb_parent; - if (pb == old->p_base) { - err = -EINVAL; - goto error1; - } - } while (nxtpb); - - /* - * If old == new, we're done. - */ - if (old->p_base->pb_ino == new->p_base->pb_ino) - goto short_out; - - if (new->p_base->pb_ino) { - /* - * Existing entry. We're replacing the new. Make sure that's - * ok. - */ - if (S_ISDIR(new->p_base->pb_ino->i_stbuf.st_mode)) { - if (!S_ISDIR(old->p_base->pb_ino->i_stbuf.st_mode)) { - err = -EISDIR; - goto error1; - } - if (new->p_base->pb_ino->i_stbuf.st_nlink > 2) { - err = -ENOTEMPTY; - goto error1; - } - } else if (S_ISDIR(old->p_base->pb_ino->i_stbuf.st_mode)) { - err = -ENOTDIR; - goto error1; - } - } - - /* - * It's not impossible to clean up the altered name space after - * a rename. However, it is onerous and I don't want to do it right - * now. If it becomes an issue, we can do it later. For now, I've - * elected to use the semantic that says, basically, the entire - * sub-tree must be unreferenced. That's per POSIX, but it's a nasty - * thing to do to the caller. - */ - if (_sysio_p_prune(new) != 1) { - err = -EBUSY; - goto error1; - } - /* - * Use the parent node operations to request the task in case the - * driver is implemented using differentiated inode operations based - * on file type, such as incore does. - */ - err = old->p_parent->p_base->pb_ino->i_ops.inop_rename(old, new); - if (err) - goto error1; - /* - * Reflect the successful rename in the active name space graph. - */ - if (new->p_base->pb_ino) - I_GONE(new->p_base->pb_ino); - new->p_base->pb_ino = old->p_base->pb_ino; - old->p_base->pb_ino = NULL; - I_REF(new->p_base->pb_ino); - -short_out: -error1: - P_RELE(new); -error2: - P_RELE(old); -error3: - if (err) - goto out; -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} diff --git a/libsysio/src/rmdir.c b/libsysio/src/rmdir.c deleted file mode 100644 index bf13fa29c955dac7ef8e70be2c64ba81ea0dba32..0000000000000000000000000000000000000000 --- a/libsysio/src/rmdir.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "fs.h" -#include "mount.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(rmdir)(const char *path) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, INT_UPDPARENT, NULL, NULL); - err = _sysio_namei(_sysio_cwd, path, 0, &intent, &pno); - if (err) - goto out; - if (!S_ISDIR(pno->p_base->pb_ino->i_stbuf.st_mode)) { - err = -ENOTDIR; - goto error; - } - err = _sysio_permitted(pno->p_parent, W_OK); - if (err) - goto error; - if (pno->p_ref > 1) { - err = -EBUSY; - goto error; - } - /* - * Use the parent node operations to request the task in case the - * driver is implemented using differentiated inode operations based - * on file type, such as incore does. - */ - err = (*pno->p_parent->p_base->pb_ino->i_ops.inop_rmdir)(pno); - if (err) - goto error; - /* - * Invalidate the path-base node and kill the i-node. - */ - ino = pno->p_base->pb_ino; - pno->p_base->pb_ino = NULL; - I_GONE(ino); -error: - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __rmdir -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(rmdir), - PREPEND(__, SYSIO_INTERFACE_NAME(rmdir))) -#endif diff --git a/libsysio/src/rw.c b/libsysio/src/rw.c deleted file mode 100644 index 300f073f9583f29fb7a2229b8747e7926d79ef26..0000000000000000000000000000000000000000 --- a/libsysio/src/rw.c +++ /dev/null @@ -1,1336 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "xtio.h" -#include "file.h" -#include "inode.h" - -#include "sysio-symbols.h" - -#define IIOXOP_READ(ino) (ino)->i_ops.inop_read, 0 -#define IIOXOP_WRITE(ino) (ino)->i_ops.inop_write, 1 - -/* - * Decoding the interface routine names: - * - * Much of this carries legacy from the POSIX world and the Intel ASCI - * Red programming environment. Routine names are composed of prefix, - * basic POSIX names, and postfix. The basic POSIX names are read and write. - * Prefixes, left-to-right: - * - * - 'i' -- asynchronous operation (from ASCI Red) - * - 'p' -- positional (POSIX) - * Posfixes, only one: - * - 'v' -- vectored (POSIX) - * - 'x' -- extent-based (new for Red Storm) - * - * All valid combinations are available and symmetric. - */ - -/* - * Post op using iovec with regions specified by the passed extent vector. - * - * NOTE: There are enough parameters that we should really consider - * passing them in a structure. - */ -static int -_sysio_iiox(int (*f)(struct inode *, struct ioctx *), - int wr, - struct file *fil, - const struct iovec *iov, - size_t iov_count, - void (*iov_free)(struct ioctx *), - const struct intnl_xtvec *xtv, - size_t xtv_count, - void (*xtv_free)(struct ioctx *), - void (*completio)(struct ioctx *, void *), - struct ioctx **ioctxp) -{ - struct inode *ino; - ssize_t cc; - struct ioctx *ioctx; - int err; - struct ioctx_callback *cb; - - /* - * Check that it was opened with flags supporting the operation. - */ - if (!F_CHKRW(fil, wr ? 'w' : 'r')) - return -EBADF; - - ino = fil->f_ino; - if (!ino) { - /* - * Huh? It's dead. - */ - return -EBADF; - } - cc = - _sysio_validx(xtv, xtv_count, - iov, iov_count, -#if defined(_LARGEFILE64_SOURCE) && defined(O_LARGEFILE) - (fil->f_flags & O_LARGEFILE) == 0 - ? LONG_MAX - : -#endif - _SYSIO_OFF_T_MAX); - if (cc < 0) - return cc; - ioctx = _sysio_ioctx_new(ino, wr, iov, iov_count, xtv, xtv_count); - if (!ioctx) - return -ENOMEM; - if ((iov_free && - (err = _sysio_ioctx_cb(ioctx, - (void (*)(struct ioctx *, - void *))iov_free, - NULL))) || - (xtv_free && - (err = _sysio_ioctx_cb(ioctx, - (void (*)(struct ioctx *, - void *))xtv_free, - NULL))) || - (completio && - (err = _sysio_ioctx_cb(ioctx, - (void (*)(struct ioctx *, - void *))completio, - fil))) || - (err = (*f)(ino, ioctx))) { - /* - * Release the callback queue. Don't want it run after all. - */ - while ((cb = ioctx->ioctx_cbq.tqh_first)) { - TAILQ_REMOVE(&ioctx->ioctx_cbq, - cb, - iocb_next); - _sysio_ioctx_cb_free(cb); - } - _sysio_ioctx_complete(ioctx); - return err; - } - *ioctxp = ioctx; - return 0; -} - -/* - * Sum iovec entries, returning total found or error if range of ssize_t would - * be exceeded. - */ -static ssize_t -_sysio_sum_iovec(const struct iovec *iov, int count) -{ - ssize_t tmp, cc; - - if (count <= 0) - return -EINVAL; - - cc = 0; - while (count--) { - tmp = cc; - cc += iov->iov_len; - if (tmp && iov->iov_len && cc <= tmp) - return -EINVAL; - iov++; - } - return cc; -} - -/* - * Asynch IO from/to iovec from/to current file offset. - */ -static int -_sysio_iiov(int (*f)(struct inode *, struct ioctx *), - int wr, - struct file *fil, - const struct iovec *iov, - int count, - void (*iov_free)(struct ioctx *), - struct intnl_xtvec *xtv, - void (*xtv_free)(struct ioctx *), - struct ioctx **ioctxp) -{ - ssize_t cc; - _SYSIO_OFF_T off; - int err; - - cc = _sysio_sum_iovec(iov, count); - if (cc < 0) - return (int )cc; - xtv->xtv_off = fil->f_pos; - xtv->xtv_len = cc; - off = xtv->xtv_off + xtv->xtv_len; - if (xtv->xtv_off && off <= xtv->xtv_off) { - /* - * Ouch! The IO vector specifies more bytes than - * are addressable. Trim the region to limit how - * much of the IO vector is finally transferred. - */ - xtv->xtv_len = _SYSIO_OFF_T_MAX - xtv->xtv_off; - } - err = - _sysio_iiox(f, - wr, - fil, - iov, count, iov_free, - xtv, 1, xtv_free, - (void (*)(struct ioctx *, void *))_sysio_fcompletio, - ioctxp); - if (err) - return err; - return 0; -} - -static void -free_xtv(struct ioctx *ioctx) -{ - - free((struct iovec *)ioctx->ioctx_xtv); - ioctx->ioctx_iov = NULL; -} - -ioid_t -SYSIO_INTERFACE_NAME(ireadv)(int fd, const struct iovec *iov, int count) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - err = - _sysio_iiov(IIOXOP_READ(fil->f_ino), - fil, - iov, count, NULL, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -ssize_t -SYSIO_INTERFACE_NAME(readv)(int fd, const struct iovec *iov, int count) -{ - struct file *fil; - struct intnl_xtvec xtvector; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - err = - _sysio_iiov(IIOXOP_READ(fil->f_ino), - fil, - iov, count, NULL, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#if defined(__GLIBC__) -#undef __readv -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(readv), - PREPEND(__, SYSIO_INTERFACE_NAME(readv))) -#undef __libc_readv -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(readv), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_readv))) -#endif - -static void -free_iov(struct ioctx *ioctx) -{ - - free((struct iovec *)ioctx->ioctx_iov); - ioctx->ioctx_iov = NULL; -} - -ioid_t -SYSIO_INTERFACE_NAME(iread)(int fd, void *buf, size_t count) -{ - struct iovec *iov; - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - iov = malloc(sizeof(struct iovec)); - if (!iov) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - iov->iov_base = buf; - iov->iov_len = count; - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) { - free(iov); - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - } - err = - _sysio_iiov(IIOXOP_READ(fil->f_ino), - fil, - iov, 1, free_iov, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - free(iov); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -ssize_t -SYSIO_INTERFACE_NAME(read)(int fd, void *buf, size_t count) -{ - struct file *fil; - struct iovec iovector; - struct intnl_xtvec xtvector; - int err; - struct ioctx *ioctx; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - iovector.iov_base = buf; - iovector.iov_len = count; - err = - _sysio_iiov(IIOXOP_READ(fil->f_ino), - fil, - &iovector, 1, NULL, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#ifdef __GLIBC__ -#undef __read -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(read), - PREPEND(__, SYSIO_INTERFACE_NAME(read))) -#undef __libc_read -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(read), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_read))) -#endif - -/* - * Asynch IO between iovec and data at the given offset. - */ -static int -_sysio_ipiov(int (*f)(struct inode *, struct ioctx *), - int wr, - struct file *fil, - const struct iovec *iov, - int count, - void (*iov_free)(struct ioctx *), - _SYSIO_OFF_T off, - struct intnl_xtvec *xtv, - void (*xtv_free)(struct ioctx *), - struct ioctx **ioctxp) -{ - ssize_t cc; - int err; - - SYSIO_ENTER; - cc = _sysio_sum_iovec(iov, count); - if (cc < 0) { - SYSIO_LEAVE; - return (int )cc; - } - xtv->xtv_off = off, - xtv->xtv_len = cc; - err = - _sysio_iiox(f, - wr, - fil, - iov, count, iov_free, - xtv, 1, xtv_free, - NULL, - ioctxp); - SYSIO_LEAVE; - if (err) - return err; - return 0; -} - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(ipreadv))(int fd, - const struct iovec *iov, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - err = - _sysio_ipiov(IIOXOP_READ(fil->f_ino), - fil, - iov, count, NULL, - offset, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -#ifdef _LARGEFILE64_SOURCE -#undef ipread64v -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ipreadv)), - SYSIO_INTERFACE_NAME(ipread64v)) -#endif - -ioid_t -SYSIO_INTERFACE_NAME(ipreadv)(int fd, - const struct iovec *iov, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(ipreadv))(fd, - iov, - count, - offset); -} - -static ssize_t -PREPEND(_, SYSIO_INTERFACE_NAME(preadv))(int fd, - const struct iovec *iov, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec xtvector; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - err = - _sysio_ipiov(IIOXOP_READ(fil->f_ino), - fil, - iov, count, NULL, - offset, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef pread64v -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(preadv)), - SYSIO_INTERFACE_NAME(pread64v)) -#endif - -ssize_t -SYSIO_INTERFACE_NAME(preadv)(int fd, - const struct iovec *iov, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(preadv))(fd, - iov, - count, - offset); -} - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(ipread))(int fd, - void *buf, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct iovec *iov; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - iov = malloc(sizeof(struct iovec)); - if (!(xtv && iov)) { - err = -ENOMEM; - goto error; - } - xtv->xtv_off = offset; - iov->iov_base = buf; - xtv->xtv_len = iov->iov_len = count; - err = - _sysio_ipiov(IIOXOP_READ(fil->f_ino), - fil, - iov, 1, free_iov, - offset, - xtv, free_xtv, - &ioctx); -error: - if (err) { - if (iov) - free(iov); - if (xtv) - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -#ifdef _LARGEFILE64_SOURCE -#undef ipread64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ipread)), - SYSIO_INTERFACE_NAME(ipread64)) -#endif - -ioid_t -SYSIO_INTERFACE_NAME(ipread)(int fd, - void *buf, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(ipread))(fd, - buf, - count, - offset); -} - -ssize_t -PREPEND(_, SYSIO_INTERFACE_NAME(pread))(int fd, - void *buf, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec xtvec; - struct iovec iovec; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtvec.xtv_off = offset; - iovec.iov_base = buf; - xtvec.xtv_len = iovec.iov_len = count; - err = - _sysio_ipiov(IIOXOP_READ(fil->f_ino), - fil, - &iovec, 1, NULL, - offset, - &xtvec, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef pread64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pread)), - SYSIO_INTERFACE_NAME(pread64)) -#if __GLIBC__ -#undef __pread64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pread)), - PREPEND(__, SYSIO_INTERFACE_NAME(pread64))) -#undef __libc_pread64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pread)), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_pread64))) -#endif -#endif - -ssize_t -SYSIO_INTERFACE_NAME(pread)(int fd, void *buf, size_t count, off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(pread))(fd, - buf, - count, - offset); -} - -#if __GLIBC__ -#undef __pread -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(pread), - PREPEND(__, SYSIO_INTERFACE_NAME(pread))) -#undef __libc_pread -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(pread), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_pread))) -#endif - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(ireadx))(int fd, - const struct iovec *iov, - size_t iov_count, - const struct intnl_xtvec *xtv, - size_t xtv_count) -{ - struct file *fil; - int err; - struct ioctx *ioctx; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - /* Perform a check on the iov_count and xtv_count */ - if ((iov_count == 0) || (xtv_count == 0)) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EINVAL); - - err = - _sysio_iiox(IIOXOP_READ(fil->f_ino), - fil, - iov, iov_count, NULL, - xtv, xtv_count, NULL, - NULL, - &ioctx); - - SYSIO_INTERFACE_RETURN(err ? IOID_FAIL : ioctx, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef iread64x -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ireadx)), - SYSIO_INTERFACE_NAME(iread64x)) -#endif - -#ifdef _LARGEFILE64_SOURCE -ioid_t -SYSIO_INTERFACE_NAME(ireadx)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec *xtv, size_t xtv_count) -{ - struct file *fil; - struct intnl_xtvec *ixtv, *ixtvent; - size_t count; - int err; - struct ioctx *ioctx; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - - /* Perform a check on the iov_count and xtv_count */ - if ((iov_count == 0) || (xtv_count == 0)) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EINVAL); - - ixtv = ixtvent = malloc(xtv_count * sizeof(struct intnl_xtvec)); - if (!ixtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - count = xtv_count; - while (count--) { - ixtvent->xtv_off = xtv->xtv_off; - ixtvent->xtv_len = xtv->xtv_len; - ixtvent++; - xtv++; - } - - err = - _sysio_iiox(IIOXOP_READ(fil->f_ino), - fil, - iov, iov_count, NULL, - ixtv, xtv_count, free_xtv, - NULL, - &ioctx); - if (err) { - free(ixtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} -#else -#undef ireadx -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ireadx)), - SYSIO_INTERFACE_NAME(ireadx)) -#endif - -ssize_t -SYSIO_INTERFACE_NAME(readx)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec *xtv, size_t xtv_count) -{ - ioid_t ioid; - - if ((ioid = SYSIO_INTERFACE_NAME(ireadx)(fd, - iov, - iov_count, - xtv, - xtv_count)) == IOID_FAIL) - return -1; - return SYSIO_INTERFACE_NAME(iowait)(ioid); -} - -#ifdef _LARGEFILE64_SOURCE -#undef iread64x -ssize_t -SYSIO_INTERFACE_NAME(read64x)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec64 *xtv, size_t xtv_count) -{ - ioid_t ioid; - - if ((ioid = SYSIO_INTERFACE_NAME(iread64x)(fd, - iov, - iov_count, - xtv, - xtv_count)) == IOID_FAIL) - return -1; - return SYSIO_INTERFACE_NAME(iowait)(ioid); -} -#endif - -#ifdef notdef -int -read_list(int fd, - int mem_list_count, - char *mem_offsets[], - int mem_lengths[], - int file_list_count, - int64_t file_offsets[], - int32_t file_lengths[]) -{ - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - SYSIO_INTERFACE_RETURN(-1, -ENOSYS); -} -#endif - -ioid_t -SYSIO_INTERFACE_NAME(iwritev)(int fd, - const struct iovec *iov, - int count) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - err = - _sysio_iiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, count, NULL, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -ssize_t -SYSIO_INTERFACE_NAME(writev)(int fd, const struct iovec *iov, int count) -{ - struct file *fil; - struct intnl_xtvec xtvector; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - err = - _sysio_iiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, count, NULL, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err < 0 ? -1 : cc, err); -} - -#ifdef __GLIBC__ -#undef __writev -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(writev), - PREPEND(__, SYSIO_INTERFACE_NAME(writev))) -#undef __libc_writev -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(writev), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_writev))) -#endif - -ioid_t -SYSIO_INTERFACE_NAME(iwrite)(int fd, const void *buf, size_t count) -{ - struct iovec *iov; - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - iov = malloc(sizeof(struct iovec)); - if (!iov) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - iov->iov_base = (void *)buf; - iov->iov_len = count; - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) { - free(iov); - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - } - err = - _sysio_iiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, 1, free_iov, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - free(iov); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -ssize_t -SYSIO_INTERFACE_NAME(write)(int fd, const void *buf, size_t count) -{ - struct file *fil; - struct iovec iovector; - struct intnl_xtvec xtvector; - int err; - struct ioctx *ioctx; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - iovector.iov_base = (void *)buf; - iovector.iov_len = count; - err = - _sysio_iiov(IIOXOP_WRITE(fil->f_ino), - fil, - &iovector, 1, NULL, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err < 0 ? -1 : cc, err); -} - -#ifdef __GLIBC__ -#undef __write -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(write), - PREPEND(__, SYSIO_INTERFACE_NAME(write))) -#undef __libc_write -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(write), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_write))) -#endif - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(ipwritev))(int fd, - const struct iovec *iov, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - if (!xtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - err = - _sysio_ipiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, count, NULL, - offset, - xtv, free_xtv, - &ioctx); - if (err) { - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -#ifdef _LARGEFILE64_SOURCE -#undef ipwrite64v -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ipwritev)), - SYSIO_INTERFACE_NAME(ipwrite64v)) -#endif - -ioid_t -SYSIO_INTERFACE_NAME(ipwritev)(int fd, - const struct iovec *iov, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(ipwritev))(fd, - iov, - count, - offset); -} - -static ssize_t -PREPEND(_, SYSIO_INTERFACE_NAME(pwritev))(int fd, - const struct iovec *iov, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec xtvector; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - err = - _sysio_ipiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, count, NULL, - offset, - &xtvector, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef pwrite64v -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pwritev)), - SYSIO_INTERFACE_NAME(pwrite64v)) -#endif - -ssize_t -SYSIO_INTERFACE_NAME(pwritev)(int fd, - const struct iovec *iov, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(pwritev))(fd, - iov, - count, - offset); -} - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(ipwrite))(int fd, - const void *buf, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec *xtv; - struct iovec *iov; - struct ioctx *ioctx; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - xtv = malloc(sizeof(struct intnl_xtvec)); - iov = malloc(sizeof(struct iovec)); - if (!(xtv && iov)) { - err = -errno; - goto error; - } - xtv->xtv_off = offset; - iov->iov_base = (void *)buf; - xtv->xtv_len = iov->iov_len = count; - err = - _sysio_ipiov(IIOXOP_WRITE(fil->f_ino), - fil, - iov, 1, free_iov, - offset, - xtv, free_xtv, - &ioctx); -error: - if (err) { - if (iov) - free(iov); - if (xtv) - free(xtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} - -#ifdef _LARGEFILE64_SOURCE -#undef ipwrite64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ipwrite)), - SYSIO_INTERFACE_NAME(ipwrite64)) -#endif - -ioid_t -SYSIO_INTERFACE_NAME(ipwrite)(int fd, - const void *buf, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(ipwrite))(fd, - buf, - count, - offset); -} - -ssize_t -PREPEND(_, SYSIO_INTERFACE_NAME(pwrite))(int fd, - const void *buf, - size_t count, - _SYSIO_OFF_T offset) -{ - struct file *fil; - struct intnl_xtvec xtvec; - struct iovec iovec; - struct ioctx *ioctx; - int err; - ssize_t cc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(-1, -EBADF); - - xtvec.xtv_off = offset; - iovec.iov_base = (void *)buf; - xtvec.xtv_len = iovec.iov_len = count; - err = - _sysio_ipiov(IIOXOP_WRITE(fil->f_ino), - fil, - &iovec, 1, NULL, - offset, - &xtvec, NULL, - &ioctx); - if (!err && (cc = _sysio_ioctx_wait(ioctx)) < 0) - err = (int )cc; - - SYSIO_INTERFACE_RETURN(err ? -1 : cc, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef pwrite64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pwrite)), - SYSIO_INTERFACE_NAME(pwrite64)) -#ifdef __GLIBC -#undef __pwrite64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pwrite)), - PREPEND(__, SYSIO_INTERFACE_NAME(pwrite64))) -#undef __libc_pwrite64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(pwrite)), - PREPEND(__, SYSIO_INTERFACE_NAME(libc_pwrite64))) -#endif -#endif - -ssize_t -SYSIO_INTERFACE_NAME(pwrite)(int fd, - const void *buf, - size_t count, - off_t offset) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(pwrite))(fd, - buf, - count, - offset); -} - -#ifdef __GLIBC -#undef __libc_pwrite -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(pwrite), __libc_pwrite) - PREPEND(__, SYSIO_INTERFACE_NAME(libc_pwrite))) -#endif - -static ioid_t -PREPEND(_, SYSIO_INTERFACE_NAME(iwritex))(int fd, - const struct iovec *iov, - size_t iov_count, - const struct intnl_xtvec *xtv, - size_t xtv_count) -{ - struct file *fil; - int err; - struct ioctx *ioctx; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!(fil && xtv_count)) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - err = - _sysio_iiox(IIOXOP_WRITE(fil->f_ino), - fil, - iov, iov_count, NULL, - xtv, xtv_count, NULL, - NULL, - &ioctx); - - SYSIO_INTERFACE_RETURN(err ? IOID_FAIL : ioctx, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef iwrite64x -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(iwritex)), - SYSIO_INTERFACE_NAME(iwrite64x)) -#endif - -#ifdef _LARGEFILE64_SOURCE -ioid_t -SYSIO_INTERFACE_NAME(iwritex)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec *xtv, size_t xtv_count) -{ - struct file *fil; - struct intnl_xtvec *ixtv, *ixtvent; - size_t count; - int err; - struct ioctx *ioctx; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - fil = _sysio_fd_find(fd); - if (!fil) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EBADF); - - /* Perform a check on the iov_count and xtv_count */ - if ((iov_count == 0) || (xtv_count == 0)) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -EINVAL); - - ixtv = ixtvent = malloc(xtv_count * sizeof(struct intnl_xtvec)); - if (!ixtv) - SYSIO_INTERFACE_RETURN(IOID_FAIL, -ENOMEM); - - count = xtv_count; - while (count--) { - ixtvent->xtv_off = xtv->xtv_off; - ixtvent->xtv_len = xtv->xtv_len; - ixtvent++; - xtv++; - } - - err = - _sysio_iiox(IIOXOP_WRITE(fil->f_ino), - fil, - iov, iov_count, NULL, - ixtv, xtv_count, free_xtv, - NULL, - &ioctx); - if (err) { - free(ixtv); - SYSIO_INTERFACE_RETURN(IOID_FAIL, err); - } - SYSIO_INTERFACE_RETURN(ioctx, 0); -} -#else -#undef iwritex -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(iwritex)), - SYSIO_INTERFACE_NAME(iwritex)) -#endif - -#undef writex -ssize_t -SYSIO_INTERFACE_NAME(writex)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec *xtv, size_t xtv_count) -{ - ioid_t ioid; - - if ((ioid = - SYSIO_INTERFACE_NAME(iwritex)(fd, - iov, - iov_count, - xtv, - xtv_count)) == IOID_FAIL) - return -1; - return SYSIO_INTERFACE_NAME(iowait)(ioid); -} - -#ifdef _LARGEFILE64_SOURCE -#undef write64x -ssize_t -SYSIO_INTERFACE_NAME(write64x)(int fd, - const struct iovec *iov, size_t iov_count, - const struct xtvec64 *xtv, size_t xtv_count) -{ - ioid_t ioid; - - if ((ioid = SYSIO_INTERFACE_NAME(iwrite64x)(fd, - iov, - iov_count, - xtv, - xtv_count)) == IOID_FAIL) - return -1; - return SYSIO_INTERFACE_NAME(iowait)(ioid); -} -#endif - -#ifdef notdef -int -write_list(int fd, - int mem_list_count, - char *mem_offsets[], - int mem_lengths[], - int file_list_count, - int64_t file_offsets[], - int32_t file_lengths[]) -{ - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - SYSIO_INTERFACE_RETURN(-1, -ENOSYS); -} -#endif diff --git a/libsysio/src/stat.c b/libsysio/src/stat.c deleted file mode 100644 index 607924a19ab81a44018335c7fb25da4975ae31ca..0000000000000000000000000000000000000000 --- a/libsysio/src/stat.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -#include "sysio-symbols.h" - -#ifndef REDSTORM -#undef fstat -#undef stat -#undef lstat -#endif - -#undef __fxstat -#undef __xstat -#undef __lxstat - -#if !defined(_STAT_VER) -#define _STAT_VER 0 -#endif - -#ifdef _LARGEFILE64_SOURCE -static void -convstat(struct stat64 *st64_buf, struct stat *st_buf) -{ - - st_buf->st_dev = st64_buf->st_dev; - st_buf->st_ino = st64_buf->st_ino; - st_buf->st_mode = st64_buf->st_mode; - st_buf->st_nlink = st64_buf->st_nlink; - st_buf->st_uid = st64_buf->st_uid; - st_buf->st_gid = st64_buf->st_gid; - st_buf->st_rdev = st64_buf->st_rdev; - st_buf->st_size = st64_buf->st_size; - st_buf->st_blksize = st64_buf->st_blksize; - st_buf->st_blocks = st64_buf->st_blocks; - st_buf->st_atime = st64_buf->st_atime; - st_buf->st_mtime = st64_buf->st_mtime; - st_buf->st_ctime = st64_buf->st_ctime; -} -#endif - -int -PREPEND(__, SYSIO_INTERFACE_NAME(fxstat))(int __ver, - int __fildes, - struct stat *__stat_buf) -{ - struct file *fil; - int err; - struct intnl_stat *buf; -#ifdef _LARGEFILE64_SOURCE - struct stat64 st64; -#endif - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - err = 0; - fil = _sysio_fd_find(__fildes); - if (!fil) { - err = -EBADF; - goto out; - } -#ifdef _LARGEFILE64_SOURCE - buf = &st64; -#else - buf = __stat_buf; -#endif - /* - * Never use the attributes cached in the inode record. Give the - * driver a chance to refresh them. - */ - err = - fil->f_ino->i_ops.inop_getattr(NULL, fil->f_ino, buf); -#ifdef _LARGEFILE64_SOURCE - if (!err) - convstat(buf, __stat_buf); -#endif -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef _fxstat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(fxstat)), - PREPEND(_, SYSIO_INTERFACE_NAME(fxstat))) -#endif - -#ifndef REDSTORM -static int -PREPEND(__, SYSIO_INTERFACE_NAME(fstat))(int fd, struct stat *buf) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(fxstat))(_STAT_VER, - fd, - buf); -} - -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(fstat)), - SYSIO_INTERFACE_NAME(fstat)) - -#ifdef BSD -#undef _fstat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(fstat)), - PREPEND(_, SYSIO_INTERFACE_NAME(fstat))) -#endif -#endif - -int -PREPEND(__, SYSIO_INTERFACE_NAME(xstat))(int __ver, - const char *__filename, - struct stat *__stat_buf) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, __filename, 0, &intent, &pno); - if (err) - goto out; - /* - * Leverage the INT_GETATTR intent above. We are counting - * on the FS driver to either make sure the attributes cached in - * the inode are always correct or refresh them in the lookup, above. - */ - ino = pno->p_base->pb_ino; -#ifdef _LARGEFILE64_SOURCE - convstat(&ino->i_stbuf, __stat_buf); -#else - (void )memcpy(__stat_buf, &ino->i_stbuf, sizeof(struct intnl_stat)); -#endif - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef _xstat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(xstat)), - PREPEND(_, SYSIO_INTERFACE_NAME(xstat))) -#endif - -#ifndef REDSTORM -static int -PREPEND(__, SYSIO_INTERFACE_NAME(stat))(const char *filename, - struct stat *buf) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(xstat))(_STAT_VER, - filename, - buf); -} - -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(stat)), - SYSIO_INTERFACE_NAME(stat)) - -#ifdef BSD -#undef _stat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(stat)), - PREPEND(_, SYSIO_INTERFACE_NAME(stat))) -#endif -#endif - -int -PREPEND(__, SYSIO_INTERFACE_NAME(lxstat))(int __ver, - const char *__filename, - struct stat *__stat_buf) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, __filename, ND_NOFOLLOW, &intent, &pno); - if (err) - goto out; - /* - * Leverage the INT_GETATTR intent above. We are counting - * on the FS driver to either make sure the attributes cached in - * the inode are always correct or refresh them in the lookup, above. - */ - ino = pno->p_base->pb_ino; -#ifdef _LARGEFILE64_SOURCE - convstat(&ino->i_stbuf, __stat_buf); -#else - (void )memcpy(__stat_buf, &ino->i_stbuf, sizeof(struct intnl_stat)); -#endif - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef _lxstat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(lxstat)), - PREPEND(_, SYSIO_INTERFACE_NAME(lxstat))) -#endif - -#ifndef REDSTORM -static int -PREPEND(__, SYSIO_INTERFACE_NAME(lstat))(const char *filename, struct stat *buf) -{ - return PREPEND(__, SYSIO_INTERFACE_NAME(lxstat))(_STAT_VER, - filename, - buf); -} - -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(lstat)), - SYSIO_INTERFACE_NAME(lstat)) - -#ifdef BSD -#undef _lstat -sysio_sym_weak_alias(PREPEND(__, SYSIO_INTERFACE_NAME(lstat)), - PREPEND(_, SYSIO_INTERFACE_NAME(lstat))) -#endif -#endif diff --git a/libsysio/src/stat64.c b/libsysio/src/stat64.c deleted file mode 100644 index 70bd43adb220b2b2c0d205f0c6e5cd4c3dbfb7b0..0000000000000000000000000000000000000000 --- a/libsysio/src/stat64.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifdef _LARGEFILE64_SOURCE - -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -#ifndef REDSTORM -#undef fstat64 -#undef stat64 -#undef lstat64 -#endif - -#undef __fxstat64 -#undef __xstat64 -#undef __lxstat64 - -int -PREPEND(__, SYSIO_INTERFACE_NAME(fxstat64))(int __ver, - int __fildes, - struct stat64 *__stat_buf) -{ - struct file *fil; - int err; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - err = 0; - fil = _sysio_fd_find(__fildes); - if (!fil) { - err = -EBADF; - goto out; - } - /* - * Never use the attributes cached in the inode record. Give - * the driver a chance to refresh them. - */ - err = fil->f_ino->i_ops.inop_getattr(NULL, fil->f_ino, __stat_buf); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifndef REDSTORM -int -SYSIO_INTERFACE_NAME(fstat64)(int fd, struct stat64 *buf) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(fxstat64))(_STAT_VER, fd, buf); -} -#endif - -int -PREPEND(__, SYSIO_INTERFACE_NAME(xstat64))(int __ver, - const char *__filename, - struct stat64 *__stat_buf) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, __filename, 0, &intent, &pno); - if (err) - goto out; - /* - * Leverage the INT_GETATTR intent above. We are counting - * on the FS driver to either make sure the attributes cached in - * the inode are always correct or refresh them in the lookup, above. - */ - ino = pno->p_base->pb_ino; - (void )memcpy(__stat_buf, &ino->i_stbuf, sizeof(struct intnl_stat)); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifndef REDSTORM -int -SYSIO_INTERFACE_NAME(stat64)(const char *filename, struct stat64 *buf) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(xstat64))(_STAT_VER, - filename, - buf); -} -#endif - -int -PREPEND(__, SYSIO_INTERFACE_NAME(lxstat64))(int __ver, - const char *__filename, - struct stat64 *__stat_buf) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - if (__ver != _STAT_VER) { - err = -ENOSYS; - goto out; - } - - INTENT_INIT(&intent, INT_GETATTR, NULL, NULL); - err = _sysio_namei(_sysio_cwd, __filename, ND_NOFOLLOW, &intent, &pno); - if (err) - goto out; - /* - * Leverage the INT_GETATTR intent above. We are counting - * on the FS driver to either make sure the attributes cached in - * the inode are always correct or refresh them in the lookup, above. - */ - ino = pno->p_base->pb_ino; - (void )memcpy(__stat_buf, &ino->i_stbuf, sizeof(struct intnl_stat)); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifndef REDSTORM -int -SYSIO_INTERFACE_NAME(lstat64)(const char *filename, struct stat64 *buf) -{ - - return PREPEND(__, SYSIO_INTERFACE_NAME(lxstat64))(_STAT_VER, - filename, - buf); -} -#endif -#endif /* !_LARGEFILE64_SOURCE */ diff --git a/libsysio/src/statvfs.c b/libsysio/src/statvfs.c deleted file mode 100644 index 5f07387cd7d7907c953c3aff660100a2f3d45764..0000000000000000000000000000000000000000 --- a/libsysio/src/statvfs.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifndef BSD - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/statvfs.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "sysio-symbols.h" - -#undef statvfs -#undef fstatvfs - -#ifndef INTNL_STATVFS_IS_NATURAL -static void -convstatvfs(struct statvfs *stvfsbuf, struct intnl_statvfs *istvfsbuf) -{ - stvfsbuf->f_bsize = istvfsbuf->f_bsize; - stvfsbuf->f_frsize = istvfsbuf->f_frsize; - stvfsbuf->f_blocks = (unsigned long )istvfsbuf->f_blocks; - stvfsbuf->f_bfree = (unsigned long )istvfsbuf->f_bfree; - stvfsbuf->f_bavail = (unsigned long )istvfsbuf->f_bavail; - stvfsbuf->f_files = (unsigned long )istvfsbuf->f_files; - stvfsbuf->f_ffree = (unsigned long )istvfsbuf->f_ffree; - stvfsbuf->f_favail = (unsigned long )istvfsbuf->f_favail; - stvfsbuf->f_fsid = istvfsbuf->f_fsid; - stvfsbuf->f_flag = istvfsbuf->f_flag; - stvfsbuf->f_namemax = istvfsbuf->f_namemax; -} -#endif - -int -SYSIO_INTERFACE_NAME(statvfs)(const char *path, struct statvfs *buf) -{ - int err; - struct pnode *pno; -#ifdef INTNL_STATVFS_IS_NATURAL -#define _call_buf buf -#else - struct intnl_statvfs _call_buffer; - struct intnl_statvfs *_call_buf = &_call_buffer; -#endif - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - - err = pno->p_base->pb_ino->i_ops.inop_statvfs(pno, NULL, _call_buf); - P_RELE(pno); - if (err) - goto err; -#ifndef INTNL_STATVFS_IS_NATURAL - convstatvfs(buf, _call_buf); -#endif - goto out; -err: -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __statvfs -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(statvfs), - PREPEND(__, SYSIO_INTERFACE_NAME(statvfs))) -#endif - -int -SYSIO_INTERFACE_NAME(fstatvfs)(int fd, struct statvfs *buf) -{ - int err; - struct file *filp; -#ifdef INTNL_STATVFS_IS_NATURAL -#define _call_buf buf -#else - struct intnl_statvfs _call_buffer; - struct intnl_statvfs *_call_buf = &_call_buffer; -#endif - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - filp = _sysio_fd_find(fd); - if (!filp) { - err = -EBADF; - goto out; - } - - err = filp->f_ino->i_ops.inop_statvfs(NULL, filp->f_ino, _call_buf); - if (err) - goto err; -#ifndef INTNL_STATVFS_IS_NATURAL - convstatvfs(buf, _call_buf); -#endif - goto out; -err: -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __fstatvfs -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fstatvfs), - PREPEND(__, SYSIO_INTERFACE_NAME(fstatvfs))) -#endif - -#endif /* ifndef BSD */ diff --git a/libsysio/src/statvfs64.c b/libsysio/src/statvfs64.c deleted file mode 100644 index c89c969573e3c26c5ebb0c5aecb5191c98448357..0000000000000000000000000000000000000000 --- a/libsysio/src/statvfs64.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#ifndef BSD -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/vfs.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(statvfs64)(const char *path, struct statvfs64 *buf) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - - err = pno->p_base->pb_ino->i_ops.inop_statvfs(pno, NULL, buf); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __statvfs64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(statvfs64), - PREPEND(__, SYSIO_INTERFACE_NAME(statvfs64))) -#endif - -int -SYSIO_INTERFACE_NAME(fstatvfs64)(int fd, struct statvfs64 *buf) -{ - int err; - struct file *filp; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - filp = _sysio_fd_find(fd); - if (!filp) { - err = -EBADF; - goto out; - } - - err = filp->f_ino->i_ops.inop_statvfs(NULL, filp->f_ino, buf); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __fstatvfs64 -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(fstatvfs64), - PREPEND(__, SYSIO_INTERFACE_NAME(fstatvfs64))) -#endif - -#endif /* ifndef BSD */ diff --git a/libsysio/src/stddir.c b/libsysio/src/stddir.c deleted file mode 100644 index 41f589b02ed6119203d1832be19252d522405144..0000000000000000000000000000000000000000 --- a/libsysio/src/stddir.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifdef __linux__ -#include <features.h> -#if defined(__GLIBC__) && !defined(REDSTORM) - -/* - * stddir.c - * - * As of glibc 2.3, the new capability to define functions with a 'hidden' - * attribute means that any time glibc decides to use that capability - * we will no longer be able to successfully intercept low level calls - * in a link against default system glibc. Thus the following imported - * functions. - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <fcntl.h> -#include <dirent.h> - -#include <sysio.h> - -#include "sysio-symbols.h" -#include "stddir.h" - -/*********************************************************** - * dir series functions * - ***********************************************************/ - -DIR* -SYSIO_INTERFACE_NAME(opendir)(const char *name) -{ - DIR *dir; - - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - dir = (DIR * )calloc(1, sizeof(DIR)); - if (!dir) - SYSIO_INTERFACE_RETURN(NULL, -ENOMEM); - - dir->fd = SYSIO_INTERFACE_NAME(open)(name, O_RDONLY); - if (dir->fd < 0) { - free(dir); - SYSIO_INTERFACE_RETURN(NULL, -errno); - } - return dir; -} - -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(opendir), - PREPEND(__, SYSIO_INTERFACE_NAME(opendir))) - -int -SYSIO_INTERFACE_NAME(closedir)(DIR *dir) -{ - int rc; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - - rc = SYSIO_INTERFACE_NAME(close)(dir->fd); - free(dir); - - SYSIO_INTERFACE_RETURN(rc, 0); -} - -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(closedir), - PREPEND(__, SYSIO_INTERFACE_NAME(closedir))) - -int -SYSIO_INTERFACE_NAME(dirfd)(DIR *dir) -{ - return(dir->fd); -} - -long int -SYSIO_INTERFACE_NAME(telldir)(DIR *dir) -{ - return(dir->filepos); -} - -void -SYSIO_INTERFACE_NAME(seekdir)(DIR *dir, long int offset) -{ - dir->filepos = offset; - dir->base = offset; - dir->effective = 0; - dir->cur = 0; -} - -void -SYSIO_INTERFACE_NAME(rewinddir)(DIR *dir) -{ - dir->base = 0; - dir->filepos = 0; - dir->cur = 0; - dir->effective = 0; -} - -#endif -#endif diff --git a/libsysio/src/stdlib.c b/libsysio/src/stdlib.c deleted file mode 100644 index 03bf1c7b2dd8ab510e37cd3068c662d7b5a6542c..0000000000000000000000000000000000000000 --- a/libsysio/src/stdlib.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* - * stdlib.c - * - * The only purpose of this file is help liblustre adaptive to more - * applications, and specifically for running on Linux. The ideal - * final solution would be remove this completely and only rely on - * system call interception. Unfortunately we failed to find that - * way at the moment. - * - * Initially we try the simplest implementation here, just get a confidence - * it could work. - * - */ -#if !(defined(BSD) || defined(REDSTORM)) - -#include <stdlib.h> -#include <errno.h> -#include <fcntl.h> -#include <dirent.h> - -#include <sysio.h> - -#include "sysio-symbols.h" - -/*********************************************************** - * FIXME workaround for linux only * - ***********************************************************/ - -#define LINUX -#if defined(LINUX) -ssize_t getxattr(char *path, char *name, void *value, size_t size) -{ - errno = ENOSYS; - return -1; -} - -ssize_t lgetxattr(char *path, char *name, void *value, size_t size) -{ - errno = ENOSYS; - return -1; -} - -ssize_t fgetxattr(int fd, char *name, void *value, size_t size) -{ - errno = ENOSYS; - return -1; -} - -long setxattr(char *path, char *name, void *value, size_t size, int flags) -{ - errno = ENOSYS; - return -1; -} - -long lsetxattr(char *path, char *name, void *value, size_t size, int flags) -{ - errno = ENOSYS; - return -1; -} - -long fsetxattr(int fd, char *name, void *value, size_t size, int flags) -{ - errno = ENOSYS; - return -1; -} - -long listxattr(char *path, char *list, size_t size) -{ - errno = ENOSYS; - return -1; -} - -long llistxattr(char *path, char *list, size_t size) -{ - errno = ENOSYS; - return -1; -} - -long flistxattr(int fd, char *list, size_t size) -{ - errno = ENOSYS; - return -1; -} - -long removexattr(char *path, char *name) -{ - errno = ENOSYS; - return -1; -} - -long lremovexattr(char *path, char *name) -{ - errno = ENOSYS; - return -1; -} - -long fremovexattr(int fd, char *name) -{ - errno = ENOSYS; - return -1; -} -#endif - -#endif diff --git a/libsysio/src/symlink.c b/libsysio/src/symlink.c deleted file mode 100644 index 2a31e372862203bb84efacbb38de84a9326fac9b..0000000000000000000000000000000000000000 --- a/libsysio/src/symlink.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "fs.h" -#include "mount.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(symlink)(const char *oldpath, const char *newpath) -{ - int err; - struct intent intent; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, INT_CREAT, NULL, NULL); - err = - _sysio_namei(_sysio_cwd, - newpath, - ND_NOFOLLOW|ND_NEGOK, - &intent, - &pno); - if (err) - goto out; - if (pno->p_base->pb_ino) { - err = -EEXIST; - goto error; - } - err = _sysio_permitted(pno->p_parent, W_OK); - if (err) - goto error; - - /* - * Use the parent node operations to request the task in case the - * driver is implemented using differentiated inode operations based - * on file type, such as incore does. - */ - err = - (*pno->p_parent->p_base->pb_ino->i_ops.inop_symlink)(pno, oldpath); -error: - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __symlink -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(symlink), - PREPEND(__, SYSIO_INTERFACE_NAME(symlink))) -#endif diff --git a/libsysio/src/truncate.c b/libsysio/src/truncate.c deleted file mode 100644 index 27ef6b663170cb353c2e3d547e6fb1167380e7c3..0000000000000000000000000000000000000000 --- a/libsysio/src/truncate.c +++ /dev/null @@ -1,164 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" -#include "fs.h" -#include "mount.h" - -#include "sysio-symbols.h" - -/* - * Truncate file, given path (alias) or index node. - */ -static int -do_truncate(struct pnode *pno, struct inode *ino, _SYSIO_OFF_T length) -{ - struct intnl_stat stbuf; - unsigned mask; - - if (length < 0) - return -EINVAL; - - if (!ino && pno->p_base->pb_ino) - ino = pno->p_base->pb_ino; - if (!ino) - return -EBADF; - if (S_ISDIR(ino->i_stbuf.st_mode)) /* for others too? */ - return -EISDIR; - if (!S_ISREG(ino->i_stbuf.st_mode)) - return -EINVAL; - - (void )memset(&stbuf, 0, sizeof(stbuf)); - stbuf.st_size = length; - mask = SETATTR_LEN; - return _sysio_setattr(pno, ino, mask, &stbuf); -} - -static int -PREPEND(_, SYSIO_INTERFACE_NAME(truncate))(const char *path, - _SYSIO_OFF_T length) -{ - int err; - struct pnode *pno; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - err = do_truncate(pno, pno->p_base->pb_ino, length); - P_RELE(pno); - -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef truncate64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(truncate)), - SYSIO_INTERFACE_NAME(truncate64)) - -#undef truncate -int -SYSIO_INTERFACE_NAME(truncate)(const char *path, off_t length) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(truncate))(path, length); -} -#else -#undef truncate -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(truncate)), - SYSIO_INTERFACE_NAME(truncate)) -#endif - -static int -PREPEND(_, SYSIO_INTERFACE_NAME(ftruncate))(int fd, _SYSIO_OFF_T length) -{ - int err; - struct file *fil; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = 0; - fil = _sysio_fd_find(fd); - if (!fil) { - err = -EBADF; - goto out; - } - if (!F_CHKRW(fil, 'w')) { - err = -EBADF; - goto out; - } - err = do_truncate(NULL, fil->f_ino, length); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef _LARGEFILE64_SOURCE -#undef ftruncate64 -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ftruncate)), - SYSIO_INTERFACE_NAME(ftruncate64)) - -#undef ftruncate -int -SYSIO_INTERFACE_NAME(ftruncate)(int fd, off_t length) -{ - - return PREPEND(_, SYSIO_INTERFACE_NAME(ftruncate))(fd, length); -} -#else -#undef ftruncate -sysio_sym_weak_alias(PREPEND(_, SYSIO_INTERFACE_NAME(ftruncate)), - SYSIO_INTERFACE_NAME(ftruncate)) -#endif diff --git a/libsysio/src/unlink.c b/libsysio/src/unlink.c deleted file mode 100644 index 8732efa4cf08c0b22df123516ffe1bbf035ee785..0000000000000000000000000000000000000000 --- a/libsysio/src/unlink.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <unistd.h> -#include <errno.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/queue.h> - -#include "sysio.h" -#include "inode.h" -#include "fs.h" -#include "mount.h" -#include "sysio-symbols.h" - -int -SYSIO_INTERFACE_NAME(unlink)(const char *path) -{ - struct intent intent; - int err; - struct pnode *pno; - struct inode *ino; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - INTENT_INIT(&intent, INT_UPDPARENT, NULL, NULL); - err = _sysio_namei(_sysio_cwd, path, ND_NOFOLLOW, &intent, &pno); - if (err) - goto out; - - err = _sysio_permitted(pno->p_parent, W_OK); - if (err) - goto error; - - ino = pno->p_base->pb_ino; - /* - * Use the parent node operations to request the task in case the - * driver is implemented using differentiated inode operations based - * on file type, such as incore does. - */ - err = (*pno->p_parent->p_base->pb_ino->i_ops.inop_unlink)(pno); - if (err) - goto error; - assert(pno->p_base->pb_ino); - /* - * Invalidate the path node. - */ - ino = pno->p_base->pb_ino; - pno->p_base->pb_ino = NULL; - /* - * Kill the i-node. I've thought and thought about this. We - * can't allow it to be found via namei any longer because we - * can't count on generation numbers support and have no - * clue why there might be other soft-references -- Could - * be an open file. - */ - I_GONE(ino); - -error: - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} - -#ifdef REDSTORM -#undef __unlink -sysio_sym_weak_alias(SYSIO_INTERFACE_NAME(unlink), - PREPEND(__, SYSIO_INTERFACE_NAME(unlink))) -#endif diff --git a/libsysio/src/utime.c b/libsysio/src/utime.c deleted file mode 100644 index d4f15cc369a6c4d60b4a2d1cb9152ca23371481f..0000000000000000000000000000000000000000 --- a/libsysio/src/utime.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <time.h> -#include <assert.h> -#include <sys/types.h> -#include <utime.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/queue.h> -#include <sys/time.h> - -#include "sysio.h" -#include "inode.h" -#include "file.h" - -time_t -_sysio_local_time() -{ - struct timeval tv; - - if (gettimeofday(&tv, NULL) != 0) - abort(); - return tv.tv_sec; -} - -int -SYSIO_INTERFACE_NAME(utime)(const char *path, const struct utimbuf *buf) -{ - int err; - struct pnode *pno; - struct utimbuf _utbuffer; - struct intnl_stat stbuf; - SYSIO_INTERFACE_DISPLAY_BLOCK; - - SYSIO_INTERFACE_ENTER; - err = _sysio_namei(_sysio_cwd, path, 0, NULL, &pno); - if (err) - goto out; - if (!buf) { - _utbuffer.actime = _utbuffer.modtime = _SYSIO_LOCAL_TIME(); - buf = &_utbuffer; - } - (void )memset(&stbuf, 0, sizeof(struct intnl_stat)); - stbuf.st_atime = buf->actime; - stbuf.st_mtime = buf->modtime; - err = - _sysio_setattr(pno, - pno->p_base->pb_ino, - SETATTR_ATIME | SETATTR_MTIME, - &stbuf); - P_RELE(pno); -out: - SYSIO_INTERFACE_RETURN(err ? -1 : 0, err); -} diff --git a/libsysio/tests/.cvsignore b/libsysio/tests/.cvsignore deleted file mode 100644 index e9955884756af11fe171e89bf99e459ac44f1a2a..0000000000000000000000000000000000000000 --- a/libsysio/tests/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/libsysio/tests/Makefile.am b/libsysio/tests/Makefile.am deleted file mode 100644 index 2901e5a52c380d2c15bc26cd3c15f8cd24b85cc9..0000000000000000000000000000000000000000 --- a/libsysio/tests/Makefile.am +++ /dev/null @@ -1,114 +0,0 @@ -noinst_PROGRAMS = test_copy test_stats test_path test_list \ - test_getcwd test_link test_unlink test_symlink test_rename \ - test_regions test_stddir test_fcntl_lock test_mknod test_mkdir \ - test_chown - -CLEANFILES=drv_data.c - -if WITH_NATIVE_DRIVER -NATIVE_DRIVER_NAME=native -NATIVE_DRIVER_CFLAGS= -I$(top_srcdir)/drivers/native -else -NATIVE_DRIVER_NAME= -NATIVE_DRIVER_CFLAGS= -endif - -if WITH_INCORE_DRIVER -INCORE_DRIVER_NAME=incore -INCORE_DRIVER_CFLAGS= -I$(top_srcdir)/drivers/incore -else -INCORE_DRIVER_NAME= -INCORE_DRIVER_CFLAGS= -endif - -if WITH_CPLANT_YOD -YOD_DRIVER_NAME=yod -YOD_DRIVER_CFLAGS= -DCPLANT_YOD -else -YOD_DRIVER_NAME= -YOD_DRIVER_CFLAGS= -endif - -DRIVERS=$(NATIVE_DRIVER_NAME) $(INCORE_DRIVER_NAME) $(YOD_DRIVER_NAME) \ - $(STFD_DEV_NAME) - -CMNSRC=startup.c drv_init_all.c drv_data.c - -BUILT_SOURCES=drv_data.c - -CFL=$(AM_CFLAGS) $(AM_CPPFLAGS) \ - $(NATIVE_DRIVER_CFLAGS) $(INCORE_DRIVER_CFLAGS) \ - $(STDFD_DEV_CFLAGS) $(YOD_DRIVER_CFLAGS) - -LIBS=$(LIBBUILD_DIR)/libsysio.a - -test_copy_SOURCES=test_copy.c $(CMNSRC) -test_copy_CFLAGS=$(CFL) -test_copy_DEPENDENCIES=$(LIBS) - -test_stats_SOURCES=test_stats.c $(CMNSRC) -test_stats_CFLAGS=$(CFL) -test_stats_DEPENDENCIES=$(LIBS) - -test_path_SOURCES=test_path.c $(CMNSRC) -test_path_CFLAGS=$(CFL) -test_path_DEPENDENCIES=$(LIBS) - -test_list_SOURCES=test_list.c $(CMNSRC) -test_list_CFLAGS=$(CFL) -test_list_DEPENDENCIES=$(LIBS) - -test_getcwd_SOURCES=test_getcwd.c $(CMNSRC) -test_getcwd_CFLAGS=$(CFL) -test_getcwd_DEPENDENCIES=$(LIBS) - -test_link_SOURCES=test_link.c $(CMNSRC) -test_link_CFLAGS=$(CFL) -test_link_DEPENDENCIES=$(LIBS) - -test_unlink_SOURCES=test_unlink.c $(CMNSRC) -test_unlink_CFLAGS=$(CFL) -test_unlink_DEPENDENCIES=$(LIBS) - -test_symlink_SOURCES=test_symlink.c $(CMNSRC) -test_symlink_CFLAGS=$(CFL) -test_symlink_DEPENDENCIES=$(LIBS) - -test_rename_SOURCES=test_rename.c $(CMNSRC) -test_rename_CFLAGS=$(CFL) -test_rename_DEPENDENCIES=$(LIBS) - -test_regions_SOURCES=test_regions.c $(CMNSRC) -test_regions_CFLAGS=$(CFL) -test_regions_DEPENDENCIES=$(LIBS) - -test_stddir_SOURCES=test_stddir.c $(CMNSRC) -test_stddir_CFLAGS=$(CFL) -test_stddir_DEPENDENCIES=$(LIBS) - -test_fcntl_lock_SOURCES=test_fcntl_lock.c $(CMNSRC) -test_fcntl_lock_CFLAGS=$(CFL) -test_fcntl_lock_DEPENDENCIES=$(LIBS) - -test_mknod_SOURCES=test_mknod.c $(CMNSRC) -test_mknod_CFLAGS=$(CFL) -test_mknod_DEPENDENCIES=$(LIBS) - -test_mkdir_SOURCES=test_mkdir.c $(CMNSRC) -test_mkdir_CFLAGS=$(CFL) -test_mkdir_DEPENDENCIES=$(LIBS) - -test_chown_SOURCES=test_chown.c $(CMNSRC) -test_chown_CFLAGS=$(CFL) -test_chown_DEPENDENCIES=$(LIBS) - -drv_data.c: $(CONFIG_DEPENDENCIES) $(top_srcdir)/tests/gendrvdata.sh - test -z "drv_data.c" && rm -f drv_data.c; \ - $(SHELL) $(top_srcdir)/tests/gendrvdata.sh $(DRIVERS) > drv_data.c - -lib_LIBRARIES=libruntime.a - -libruntime_a_SOURCES=sysio-run-start.c startup.c drv_init_all.c drv_data.c - -AM_CFLAGS = -L$(LIBBUILD_DIR) -include $(top_srcdir)/Rules.make diff --git a/libsysio/tests/README b/libsysio/tests/README deleted file mode 100644 index a8cb7a831f61ba4f8d3255f299818d6339054bbd..0000000000000000000000000000000000000000 --- a/libsysio/tests/README +++ /dev/null @@ -1,185 +0,0 @@ -To run the tests, just do a "make check" in the tests subdirectory. -On the CPlant alpha systems, 3 of the 7 tests in test_all.pl are excluded -due to known problems (problems as of the date of writing this; they -may have since been fixed). You can also manually run the individual -tests or ./test_all.pl. If you are running on CPlant, you need to -run test_all.pl with a -alpha argument. Either "make check" or -test_all.pl will run the 7 basic functionality tests (explained -below) and report the total number of passes and failures. -number of passes and failures. - ------------------------SCRIPTS--------------------------------- - -There are a total of 8 scripts: test_copy.pl, test_list.pl, -test_getcwd.pl, test_stats.pl, test_stdfd.pl, test_path.pl, -populator.pl, and verifier.pl. All but the last two scripts -are ran with the test_all.pl script. Here is an explanation -of the scripts. All scripts take an optional "-alpha" arg -for running the scripts in an alpha/cplant environment. The -alpha arg makes certain assumptions about the running of the -environment; for example, it does not initilization and it -starts off the test driver with yod. - -test_copy.pl <src> <dest> : This copies a file from src to dest. - : It runs a system cmp to verify that - : the two files are equivalent - -test_list.pl [-p] <dir> : This comes in two forms. -test_list.pl [-p] <fstype:mdir> <dir> : In the first form, it will - : parse through the getdirentries - : result in order to generate a - : a listing. If the -p option is - : given, it will print out the - : listing. In the second form, it - : mounts mdir into dir and then does - : the listing - -test_getcwd.pl <dir> : Tests getcwd by verifying that setting the current - : working directory to dir and then calling getcwd - : returns dir - -test_stats.pl <file> : Verifies that the set of stat calls (stat, fstat, - : fstatvfs, statvfs) return the same set of stats for file - : and that the calls return the same items as Perl's stat - : call (which would use a native library and not libsysio) - -test_stdfd.pl <file> : Verified that stdin, stdout, and stderr can be opened and - : either written to or read from - -test_path.pl <path1> <path2> ... : Print each path listed and its type. - : If no paths are given, paths are read - : from stdin until a "quit" is given - -populator.pl [-seed seed] : Create a file and populate with random numbers. - [-file filename] : Will use the given seed for the random number - [-bytes bytes] : generator if it is given, otherwise it uses the - : the current time as a seed. The seed used is - : returned. If no filename is given, the file - : will be named randfile.seed.procnum, where seed - : is the seed used and procnum is the process number - : of the script. If no bytes are given, 1024 bytes - : are written. All write commands use libsysio - - -verifier.pl <-seed seed> <-file fname> : Verifies that all bytes in the file fname - : (which was created with populator) match the - : random numbers which would have been used with - : the populator, using the given seed. - - - ------------------------------TEST DRIVER--------------------------------- - - -There are 6 basic commands for the test driver, CALL, PRINT, -ALLOC, FREE, HELP, and exit (EXIT, quit, or QUIT will also work). - -CALL is the main command for running libsysio calls. The format -will depend on the particular libsysio command being ran. -Basically, the format is CALL cmd args. The available commands -used with CALL are (in no particular order): - -fstat iwrite read chdir -fstatvfs iwritev readv chmod -fsync list rmdir chown -ftruncate lseek sizeof close -getcwd lstat stat cmpstr -getdirentries mkdir statvfs creat -init mknod symlink debug -ioctl mount truncate dup -iodone open umask dup2 -iowait umount endian ipread -printline unlink ipreadv pread -write fcntl ipwrite preadv -writev fdatasync ipwritev pwritev -fill iread pwrite ireadv - -The specifics of the commands are explained later. - -The return value from a command can be saved and referenced later -by using a syntax similar to $foo = x. Commands can be combined, such -as: - -CALL fstat ( $fd = CALL open foo ) ( $buf = ALLOC 128 ), - -with some cautionary notes. First, everything needs to be -seperated by a space. File names with spaces in them need to be quoted, -as in: - -$fd = CALL open "file with spaces" O_RDONLY - -Second, any value that is used needs to be identified with an identifier. -In other words, the command: - -$buf = ALLOC ( CALL sizeof stat ) - -will not work, but the command - -$buf = ALLOC ( $size = CALL sizeof stat ) - -will. - - -All commands return a 4 digit status code. The codes are: - -0000 : Success. This does NOT necessarily mean that the libsysio - : command returned success, only that there were no errors - : in issuing the command to libsysio. To get the result of - : the libsysio command, use PRINT $$ . PRINT $errno will return - : the last error code. -0x001 : Invalid arguments given to command -0x002 : Invalid command issued -0x004 : Invalid variable identifier given - - -ALLOC takes a size argument and an optional alignment argument. -FREE takes the variable to free as an argument. -HELP without any arguments displays the list of commands. -HELP <cmd> will give information on the specific command - -PRINT take several forms. To just print out a variable, type -PRINT $var-name. If the variable is an integer, it will return -the integer. If it is a string, it will print out the string. -If it is a buffer, it will print out the buffer as a series of -hex digits. Note for most buffers, the test driver will not -know what it contains--just because it should contain a string -does not mean that the driver will know that. - -The other form of PRINT is: - -PRINT $var_name <offset> <length> <type> - -which will print out length units of the given type starting at -the given offset. The length is the total length in bytes, so -for an integer, a length of 4 would only print out one integer. -The length argument is ignored for strings. Allowable types are -INT SHORT CHAR and LONG. - -For most of the CALL commands, their format is similar to the -related sysio call. The ones that do not have a corresponding -sysio call are listed below: - -init: This MUST be called prior to any sysio calls. It initilizes - : libsysio - -printline: If debugging is turned on, this will print a line number - : with any debug lines - -fill <val> <type> <size> <offset> <buf>: Fills buffer buf with size - : bytes of val starting at - : buf+offset. The type of val - : can be UINT. STR, or PTR and - : is given by the type arg - -list <dir>: Lists contents of dir. If no dir is given, uses cwd - -debug <num>: Sets debug level to num - -sizeof <obj>: Gives the size of the obj. Valid objs are char, int, - : long, flock, stat, and statvfs - -endian: returns 0 if the machine is little endian, one otherwise - -cmpstr <buf1> <buf2>: Issues a strcmp call on the two buffers to - : see if they are the same. Returns 0 for a - : match diff --git a/libsysio/tests/drv_init_all.c b/libsysio/tests/drv_init_all.c deleted file mode 100644 index 6a3ad2b24cdb5b2272e8b5679bd5cec0c530a741..0000000000000000000000000000000000000000 --- a/libsysio/tests/drv_init_all.c +++ /dev/null @@ -1,23 +0,0 @@ -#include <stdio.h> - -extern int (*drvinits[])(void); - -/* - * Init all the drivers we know about. - */ -int -drv_init_all() -{ - int (**f)(void); - int err; - - err = 0; - f = drvinits; - while (*f) { - err = (**f++)(); - if (err) - return err; - } - - return 0; -} diff --git a/libsysio/tests/gendrvdata.sh b/libsysio/tests/gendrvdata.sh deleted file mode 100644 index 8b84d82ee673b97a1ad904f1be63e0ad97d66aed..0000000000000000000000000000000000000000 --- a/libsysio/tests/gendrvdata.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/sh - -# This Cplant(TM) source code is the property of Sandia National -# Laboratories. -# -# This Cplant(TM) source code is copyrighted by Sandia National -# Laboratories. -# -# The redistribution of this Cplant(TM) source code is subject to the -# terms of the GNU Lesser General Public License -# (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) -# -# Cplant(TM) Copyright 1998-2003 Sandia Corporation. -# Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive -# license for use of this work by or on behalf of the US Government. -# Export of this program may require a license from the United States -# Government. - -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -# Questions or comments about this library should be sent to: -# -# Lee Ward -# Sandia National Laboratories, New Mexico -# P.O. Box 5800 -# Albuquerque, NM 87185-1110 -# -# lee@sandia.gov - -echo '/*' -echo ' * This file automatically generated by gendrvdata.sh. All changes' -echo ' * will be lost!' -echo ' */' -echo -echo '#include <stdlib.h>' -echo -echo '#include "test.h"' -echo -for i in $@; do - echo "extern int _sysio_${i}_init(void);" -done -echo -echo 'int (*drvinits[])(void) = {' - -for i in $@; do - echo " _sysio_${i}_init," -done -echo " NULL" -echo "};" diff --git a/libsysio/tests/module.mk b/libsysio/tests/module.mk deleted file mode 100644 index 3ed43b0fe2baddeb75c798f4fb1c233211c4557e..0000000000000000000000000000000000000000 --- a/libsysio/tests/module.mk +++ /dev/null @@ -1,2 +0,0 @@ -TESTS_EXTRA = $(shell ls tests/*.[ch]) \ - tests/Makefile.am tests/Makefile.in tests/module.mk diff --git a/libsysio/tests/startup.c b/libsysio/tests/startup.c deleted file mode 100644 index 033b325ce9ab328727a16ddf8d22cb1e32b7e4b1..0000000000000000000000000000000000000000 --- a/libsysio/tests/startup.c +++ /dev/null @@ -1,65 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include "test.h" - -#include "sysio.h" -#include "xtio.h" - -int -_test_sysio_startup() -{ - int err; - char *arg; - - err = _sysio_init(); - if (err) - return err; - err = drv_init_all(); - if (err) - return err; -#ifdef SYSIO_TRACING - /* - * tracing - */ - arg = getenv("SYSIO_TRACING"); - err = _sysio_boot("trace", arg); - if (err) - return err; -#endif - /* - * namespace - */ - arg = getenv("SYSIO_NAMESPACE"); - if (!(arg || (arg = getenv("SYSIO_MANUAL")))) { - /* - * Assume a native mount at root with automounts enabled. - */ - arg = "{mnt,dev=\"native:/\",dir=/,fl=2}"; - } - err = _sysio_boot("namespace", arg); - if (err) - return err; -#ifdef DEFER_INIT_CWD - /* - * Current working directory. - */ - arg = getenv("SYSIO_CWD"); - if (!arg) - arg = "/"; - err = _sysio_boot("cwd", arg); - if (err) - return err; -#endif - return 0; -} - -void -_test_sysio_shutdown() -{ - - _sysio_shutdown(); -} diff --git a/libsysio/tests/sysio-run-start.c b/libsysio/tests/sysio-run-start.c deleted file mode 100644 index d09fba41d5f09cef303ebb400680358c7ff6ddd5..0000000000000000000000000000000000000000 --- a/libsysio/tests/sysio-run-start.c +++ /dev/null @@ -1,26 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/uio.h> -#include <xtio.h> -#include "test.h" - -void _sysio_startup(void) __attribute__ ((constructor)); - -void -_sysio_startup() -{ - int err; - - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - abort(); - } - if (atexit(_test_sysio_shutdown) != 0) { - perror("atexit"); - abort(); - } -} diff --git a/libsysio/tests/test.h b/libsysio/tests/test.h deleted file mode 100644 index 781d59b88b2af4424896b0eec428cba96cb03319..0000000000000000000000000000000000000000 --- a/libsysio/tests/test.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -extern int (*drvinits[])(void); - -extern int drv_init_all(void); -extern int _test_sysio_startup(void); -extern void _test_sysio_shutdown(void); diff --git a/libsysio/tests/test_chown.c b/libsysio/tests/test_chown.c deleted file mode 100644 index 251f2995a1417c08a4e362bf1515a0afa5e02082..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_chown.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2007 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "test.h" - -/* - * Test chown call - * - * Usage: chown <path> <uid> <gid> - * - */ - -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int (*chown_func)(const char *, uid_t, gid_t); - int (*stat_func)(const char *, struct stat *); - int i; - int err; - int n; - char *path; - uid_t uid; - gid_t gid; - struct stat stbuf; - extern int _test_sysio_startup(void); - - chown_func = SYSIO_INTERFACE_NAME(chown); - stat_func = SYSIO_INTERFACE_NAME(stat); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - if (n < 3) usage(); - - path = argv[optind++]; - uid = atoi(argv[optind++]); - gid = atoi(argv[optind++]); - - do { - err = (*chown_func)(path, uid, gid); - if (err != 0) { - perror(path); - break; - } - err = (*stat_func)(path, &stbuf); - if (err != 0) { - perror(path); - break; - } - (void )printf("uid now %ld, gid now %ld\n", - (long )stbuf.st_uid, (long )stbuf.st_gid); - } while (0); - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return err ? -1 : 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: chown" - " <path> <uid> <gid>\n"); - - exit(1); -} diff --git a/libsysio/tests/test_copy.c b/libsysio/tests/test_copy.c deleted file mode 100644 index e1954ffc46e478976469ee88cc29f2ccbacd5008..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_copy.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <sys/queue.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Copy one file to another. - * - * Usage: test_copy [-o] <src> <dest> - * - * Destination will not be overwritten if it already exist. - */ - -static int overwrite = 0; /* over-write? */ - -void usage(void); -int copy_file(const char *spath, const char *dpath); - -int -main(int argc, char * const argv[]) -{ - int i; - int err; - const char *spath, *dpath; - - /* - * Parse command-line args. - */ - while ((i = getopt(argc, - argv, - "o" - )) != -1) - switch (i) { - - case 'o': - overwrite = 1; - break; - default: - usage(); - } - - if (!(argc - optind)) - usage(); - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - /* - * Source - */ - spath = argv[optind++]; - if (!(argc - optind)) - usage(); - /* - * Destination - */ - dpath = argv[optind++]; - if (argc - optind) - usage(); - - err = copy_file(spath, dpath); - - _test_sysio_shutdown(); - - return err; -} - -void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_copy " - " source destination\n"); - exit(1); -} - -int -open_file(const char *path, int flags, mode_t mode) -{ - int fd; - - fd = SYSIO_INTERFACE_NAME(open)(path, flags, mode); - if (fd < 0) - perror(path); - - return fd; -} - -int -copy_file(const char *spath, const char *dpath) -{ - int sfd, dfd; - int flags; - int rtn; - struct stat stat; - char *buf; - size_t bufsiz; - ssize_t cc, wcc; - - sfd = dfd = -1; - rtn = -1; - buf = NULL; - - sfd = open_file(spath, O_RDONLY, 0); - if (sfd < 0) - goto out; - flags = O_CREAT|O_WRONLY; - if (!overwrite) - flags |= O_EXCL; - dfd = open_file(dpath, flags, 0666); - if (dfd < 0) - goto out; - - rtn = SYSIO_INTERFACE_NAME(fstat)(dfd, &stat); - if (rtn != 0) { - perror(dpath); - goto out; - } - bufsiz = stat.st_blksize; - if (bufsiz < (64 * 1024)) - bufsiz = - (((64 * 1024) / stat.st_blksize - 1) + 1) * (64 * 1024); - buf = malloc(bufsiz); - if (!buf) { - perror(dpath); - goto out; - } - - while ((cc = SYSIO_INTERFACE_NAME(read)(sfd, buf, bufsiz)) > 0) - if ((wcc = SYSIO_INTERFACE_NAME(write)(dfd, buf, cc)) != cc) { - if (wcc < 0) { - perror(dpath); - break; - } - (void )fprintf(stderr, - "%s: short write (%u/%u)\n", - dpath, - (unsigned )wcc, - (unsigned )cc); - break; - } - if (cc < 0) { - perror(spath); - rtn = -1; - } - -out: - if (buf) - free(buf); - if (sfd >= 0 && SYSIO_INTERFACE_NAME(close)(sfd) != 0) - perror(spath); - if (dfd >= 0 && - (SYSIO_INTERFACE_NAME(fsync)(dfd) != 0 || - SYSIO_INTERFACE_NAME(close)(dfd) != 0)) - perror(dpath); - - return rtn; -} diff --git a/libsysio/tests/test_fcntl_lock.c b/libsysio/tests/test_fcntl_lock.c deleted file mode 100644 index c5e79b50d89ee7711ffa4dad525bf8b46dddb620..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_fcntl_lock.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2005 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * fcntl lock tests - * - * Usage: test_fcnt_lock [<path> ...] - */ - -void usage(void); -void do_tests(const char *path); - -int -main(int argc, char * const argv[]) -{ - int i; - int err; - extern int _test_sysio_startup(void); - - /* - * Parse command-line args. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - while (optind < argc) - do_tests(argv[optind++]); - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_fcntl_lock" - " source...\n"); - exit(1); -} - -void -do_tests(const char *path) -{ - int fd; - int err; - struct flock flock; - - fd = SYSIO_INTERFACE_NAME(open)(path, O_RDONLY); - if (fd < 0) { - perror(path); - return; - } - do { - flock.l_type = F_RDLCK; - flock.l_whence = SEEK_CUR; - flock.l_start = 0; - flock.l_len = 0; - flock.l_pid = 0; - err = SYSIO_INTERFACE_NAME(fcntl)(fd, F_SETLK, &flock); - if (err) - break; - flock.l_type = F_UNLCK; - err = SYSIO_INTERFACE_NAME(fcntl)(fd, F_SETLK, &flock); - if (err) - break; - } while (0); - - if (err) - perror(path); - if (SYSIO_INTERFACE_NAME(close)(fd) != 0) - perror(path); -} diff --git a/libsysio/tests/test_getcwd.c b/libsysio/tests/test_getcwd.c deleted file mode 100644 index fb56c240e59e8f3e703f0c88ea921bf4fab637aa..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_getcwd.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <sys/queue.h> -#include <dirent.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "mount.h" - -#include "test.h" - -/* - * Test getcwd() - * - * Usage: test_cwd [<working-dir>...] - * - * Without any path arguments, the program reads from standard-in, dealing with - * each line as an absolute or relative path until EOF. - */ - -static int doit(const char *path); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - const char *path; - - path = argv[optind++]; - (void )doit(path); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - doit(buf); - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -doit(const char *path) -{ - char *buf; - - if (SYSIO_INTERFACE_NAME(chdir)(path) != 0) { - perror(path); - return -1; - } - buf = SYSIO_INTERFACE_NAME(getcwd)(NULL, 0); - if (!buf) { - perror(path); - return -1; - } - (void )printf("%s\n", buf); - free(buf); - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_getcwd " - " [<path> ...\n]"); - - exit(1); -} diff --git a/libsysio/tests/test_link.c b/libsysio/tests/test_link.c deleted file mode 100644 index 317bc5ea0c4a906dad8141324c2ac77f12956420..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_link.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#if 0 -#include <dirent.h> -#endif -#include <sys/uio.h> -#include <sys/queue.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Test hard link - * - * Usage: link oldpath newpath - * - */ - -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - if (n < 2) usage(); - - /* - * Try paths listed on command-line. - */ - while (optind < argc) { - const char *old, *new; - struct stat stbuf; - - old = argv[optind++]; - new = argv[optind++]; - if ((err = SYSIO_INTERFACE_NAME(link)(old, new)) != 0) { - perror("link"); - break; - } - if ((err = SYSIO_INTERFACE_NAME(lstat)(new, &stbuf)) != 0) { - perror(new); - break; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return err ? -1 : 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: unlink" - " oldpath newpath\n"); - - exit(1); -} diff --git a/libsysio/tests/test_list.c b/libsysio/tests/test_list.c deleted file mode 100644 index b9e4c0de459636b21996bd601f9b19e891b49098..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_list.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <dirent.h> -#include <sys/uio.h> -#include <sys/queue.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Stat files. - * - * Usage: test_list [path...] - * - * Without any path arguments, the program reads from standard-in, dealing with - * each line as an absolute or relative path until EOF. - */ - -static int listit(const char *path); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - const char *path; - - path = argv[optind++]; - (void )listit(path); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - listit(buf); - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -listit(const char *path) -{ - int fd; - size_t n; - struct dirent *buf, *dp; - off_t base; - ssize_t cc; - - fd = SYSIO_INTERFACE_NAME(open)(path, O_RDONLY); - if (fd < 0) { - perror(path); - return -1; - } - - n = 16 * 1024; - buf = malloc(n); - if (!buf) { - perror(path); - cc = -1; - goto out; - } - - while ((cc = SYSIO_INTERFACE_NAME(getdirentries)(fd, - (char *)buf, - n, - &base)) > 0) { - dp = buf; - while (cc > 0) { - (void )printf("\t%s: ino %llu type %u\n", - dp->d_name, - (unsigned long long )dp->d_ino, - (int )dp->d_type); - cc -= dp->d_reclen; - dp = (struct dirent *)((char *)dp + dp->d_reclen); - } - } - -out: - if (cc < 0) - perror(path); - - free(buf); - { - int oerrno = errno; - - if (SYSIO_INTERFACE_NAME(close)(fd) != 0) { - perror(path); - if (cc < 0) - errno = oerrno; - else - cc = -1; - } - } - - return (int )cc; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: list_path" - " [<path> ...\n]"); - - exit(1); -} diff --git a/libsysio/tests/test_mkdir.c b/libsysio/tests/test_mkdir.c deleted file mode 100644 index e8711f80bda342415a691f2c1a6166617642ceb7..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_mkdir.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#if 0 -#include <dirent.h> -#endif -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "test.h" - -/* - * Make directories. - * - * Usage: mkdir [path...] - * - * Without any path arguments, the program creates directories named - * by the command line args. - */ - -static int do_mkdir(const char *path); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - const char *path; - - path = argv[optind++]; - (void )do_mkdir(path); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - do_mkdir(buf); - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -do_mkdir(const char *path) -{ - - if (SYSIO_INTERFACE_NAME(mkdir)(path, 777) != 0) { - perror(path); - return -1; - } - - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: mkdir" - " [<path> ...\n]"); - - exit(1); -} diff --git a/libsysio/tests/test_mknod.c b/libsysio/tests/test_mknod.c deleted file mode 100644 index 7f1d9378e62a53739b4ab9f01ccc2258a29bee04..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_mknod.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -/* - * Can't provoke a definition of the S_IFMT macros without a little extra work. - */ -#define _BSD_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "test.h" - -/* - * Create a node. - * - * Usage: mknod path {f|b|c} [dev] - * - * The dev argument should not be present for regular file and FIFO object - * creation. - */ - -static int do_mknod(const char *path, mode_t mode, dev_t dev); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - mode_t mode; - dev_t dev; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - if (argc - optind < 2) - usage(); - if (strlen(argv[optind + 1]) != 1) - usage(); - mode = 0666; - switch (*argv[optind + 1]) { - - case 'f': - mode |= S_IFREG; - break; - case 'b': - mode |= S_IFBLK; - break; - case 'c': - mode |= S_IFCHR; - break; - case 'p': - mode |= S_IFIFO; - break; - default: - usage(); - } - dev = 0; - if (!(S_ISREG(mode) || S_ISFIFO(mode))) - dev = atoi(argv[optind + 2]); - else if (argc - optind != 2) { - (void )fprintf(stderr, "Too many arguments\n"); - usage(); - } - (void )do_mknod(argv[optind + 0], mode, dev); - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -do_mknod(const char *path, mode_t mode, dev_t dev) -{ - - if (SYSIO_INTERFACE_NAME(mknod)(path, mode, dev) != 0) { - perror(path); - return -1; - } - - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, "Usage: mknod path {f|b|c|p} dev\n"); - exit(1); -} diff --git a/libsysio/tests/test_path.c b/libsysio/tests/test_path.c deleted file mode 100644 index 8776b6ef45d0984d5cb3e0c7133eb7f8ce56906c..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_path.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/uio.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Stat files. - * - * Usage: test_path [path...] - * - * Without any path arguments, the program reads from standard-in, dealing with - * each line as an absolute or relative path until EOF. - */ - -static int statit(const char *path); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - const char *path; - - path = argv[optind++]; - (void )statit(path); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - statit(buf); - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -statit(const char *path) -{ - int err; - struct stat stbuf; - char t; - static char buf[4096]; - ssize_t cc; - - /* - * Get file attrs. - */ - err = SYSIO_INTERFACE_NAME(lstat)(path, &stbuf); - if (err) { - perror(path); - return -1; - } - - /* - * Get readable representation of file type. - */ - if (S_ISDIR(stbuf.st_mode)) - t = 'd'; - else if (S_ISCHR(stbuf.st_mode)) - t = 'c'; - else if (S_ISBLK(stbuf.st_mode)) - t = 'b'; - else if (S_ISREG(stbuf.st_mode)) - t = 'f'; -#ifdef S_ISFIFO - else if (S_ISFIFO(stbuf.st_mode)) - t = 'p'; -#endif -#ifdef S_ISLNK - else if (S_ISLNK(stbuf.st_mode)) - t = 'S'; -#endif -#ifdef S_ISSOCK - else if (S_ISSOCK(stbuf.st_mode)) - t = 's'; -#endif -#ifdef S_TYPEISMQ - else if (S_TYPEISMQ(&stbuf)) - t = 'q'; -#endif -#ifdef S_TYPEISSEM - else if (S_TYPEISSEM(&stbuf)) - t = 'M'; -#endif -#ifdef S_TYPEISSHM - else if (S_TYPEISSHM(&stbuf)) - t = 'm'; -#endif - else - t = '?'; - - /* - * Print path and type. - */ - if (S_ISLNK(stbuf.st_mode)) { - cc = SYSIO_INTERFACE_NAME(readlink)(path, buf, sizeof(buf)); - if (cc < 0) { - perror(path); - return -1; - } - } - (void )printf("%s: %c", path, t); - if (S_ISLNK(stbuf.st_mode) && (size_t )cc < sizeof(buf)) - (void )printf(" %.*s", (int )cc, buf); - (void )putchar('\n'); - - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_path" - " [<path> ...\n]"); - - exit(1); -} diff --git a/libsysio/tests/test_regions.c b/libsysio/tests/test_regions.c deleted file mode 100644 index e253a3028f4a4382b139c42a36f1c69d4df21d52..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_regions.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is regionsrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2004 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a regions of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <limits.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Copy one file to another. - * - * Usage: test_regions [-x] \ - * {r,w} <off> <count> <path> - * - * Destination will not be overwritten if it already exist. - */ - -#if defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE -#define GO64 -#else -#warning Cannot prompt the 64-bit interface -#endif - -char which; -#ifdef GO64 -int use64 = 0; /* 64-bit interface? */ -#endif - -void usage(void); - -int -main(int argc, char * const argv[]) -{ - int i; - int err; - long l; - off_t off; -#ifdef GO64 - long long ll; - off64_t off64; -#endif - char *cp; - unsigned long nbytes; - const char *path; - char *buf; - int flags; - int fd; - ssize_t cc; - extern int _test_sysio_startup(void); - - /* - * Parse command-line args. - */ - while ((i = getopt(argc, - argv, -#ifdef __GLIBC__ - "+" -#endif -#ifdef GO64 - "x" -#endif - "")) != -1) - switch (i) { - -#ifdef GO64 - case 'x': - use64 = 1; - break; -#endif - default: - usage(); - } - - if (argc - optind != 4) - usage(); - - which = *argv[optind]; - if (strlen(argv[optind]) != 1 || !(which == 'r' || which == 'w')) { - (void )fprintf(stderr, "Which op?\n"); - exit(1); - } - optind++; - off = l = -#ifdef GO64 - ll = strtoll(argv[optind++], &cp, 0); -#else - strtol(argv[optind++], &cp, 0); -#endif -#ifdef GO64 - off64 = ll; -#endif - if (*cp != '\0' || -#ifdef GO64 - ((ll == LLONG_MIN || ll == LLONG_MAX) && errno == ERANGE) || - off64 != ll || (!use64 && off != ll) -#else - ((l == LONG_MIN || l == LONG_MAX) && errno == ERANGE) || - off != l -#endif - ) { - (void )fprintf(stderr, "Offset out of range\n"); - exit(1); - } - nbytes = strtoul(argv[optind++], &cp, 0); - if (*cp != '\0' || (nbytes == ULONG_MAX && errno == ERANGE)) { - (void )fprintf(stderr, "Transfer count out of range\n"); - exit(1); - } - if (!(argc - optind)) - usage(); - path = argv[optind++]; - - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - (void )umask(022); - - buf = malloc(nbytes); - if (!buf) { - perror("malloc"); - err = 1; - goto out; - } - (void )memset(buf, 0, nbytes); - - err = 0; - flags = which == 'r' ? O_RDONLY : (O_WRONLY|O_CREAT|O_EXCL); -#ifdef GO64 - if (use64) - flags |= O_LARGEFILE; -#endif - fd = SYSIO_INTERFACE_NAME(open)(path, flags, 0666); - if (fd < 0) { - perror(path); - err = 1; - goto error; - } -#ifdef GO64 - if (use64) - off64 = SYSIO_INTERFACE_NAME(lseek64)(fd, off64, SEEK_SET); - else - off64 = -#endif - off = SYSIO_INTERFACE_NAME(lseek)(fd, off, SEEK_SET); -#ifdef GO64 - if ((use64 && off64 < 0) || (!use64 && off < 0)) { - perror(use64 ? "lseek64" : "lseek"); - err = 1; - goto error; - } -#else - if (off < 0) { - perror("lseek"); - err = 1; - goto error; - } -#endif - if (which == 'r') - cc = SYSIO_INTERFACE_NAME(read)(fd, buf, nbytes); - else - cc = SYSIO_INTERFACE_NAME(write)(fd, buf, nbytes); - if (cc < 0) { - perror(path); - err = 1; - goto error; - } -#ifdef GO64 - if (use64) { - off64 = SYSIO_INTERFACE_NAME(lseek64)(fd, 0, SEEK_CUR); - } else - off64 = -#endif - off = SYSIO_INTERFACE_NAME(lseek)(fd, 0, SEEK_CUR); - (void )printf(("%s%s@" -#ifdef GO64 - "%lld" -#else - "%ld" -#endif - ": %ld, off " -#ifdef GO64 - "%lld" -#else - "%ld" -#endif - "\n"), - which == 'r' ? "read" : "write", -#ifdef GO64 - use64 ? "64" : "", - ll, -#else - "", - l, -#endif - (long )cc, -#ifdef GO64 - (long long int)off64 -#else - off -#endif - ); - -error: - if (fd > 0 && SYSIO_INTERFACE_NAME(close)(fd) != 0) - perror(path); - free(buf); -out: - _test_sysio_shutdown(); - - return err; -} - -void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_regions " -#ifdef GO64 - "[-x] " -#endif - " {r,w} <offset> <nbytes> <path>\n"); - exit(1); -} diff --git a/libsysio/tests/test_rename.c b/libsysio/tests/test_rename.c deleted file mode 100644 index 13aa1eec2c87f0361ee903f25357de40699a2193..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_rename.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/uio.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Rename a file system object. - * - * Usage: test_rename <src> <dest> - */ - -void usage(void); -int rename_file(const char *spath, const char *dpath); - -int -main(int argc, char * const argv[]) -{ - int i; - int err; - const char *spath, *dpath; - extern int _test_sysio_startup(void); - - /* - * Parse command-line args. - */ - while ((i = getopt(argc, - argv, - "" - )) != -1) - switch (i) { - - default: - usage(); - } - - if (!(argc - optind)) - usage(); - - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - (void )SYSIO_INTERFACE_NAME(umask)(022); - - /* - * Source - */ - spath = argv[optind++]; - if (!(argc - optind)) - usage(); - /* - * Destination - */ - dpath = argv[optind++]; - if (argc - optind) - usage(); - - err = SYSIO_INTERFACE_NAME(rename)(spath, dpath); - if (err) - perror("rename"); - - _test_sysio_shutdown(); - - return err; -} - -void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_rename" - " source destination\n"); - exit(1); -} diff --git a/libsysio/tests/test_stats.c b/libsysio/tests/test_stats.c deleted file mode 100644 index e604d9d5f5af0a9e443004c4f4c6757d036429e5..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_stats.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#ifdef notdef -#include <sys/statvfs.h> -#endif -#include <sys/uio.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Get stats of file and file system. - * - * Usage: test_stats [<path> ...] - */ - -void usage(void); -void do_stats(const char *path); - -int -main(int argc, char * const argv[]) -{ - int i; - int err; - extern int _test_sysio_startup(void); - - /* - * Parse command-line args. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - (void )SYSIO_INTERFACE_NAME(umask)(022); - - while (optind < argc) - do_stats(argv[optind++]); - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_stats" - " source destination\n"); - exit(1); -} - -void -do_stats(const char *path) -{ - int fd; - int err; - struct stat stbuf1, stbuf2; -#ifdef notdef - struct statvfs stvfsbuf1, stvfsbuf2; -#endif - - fd = SYSIO_INTERFACE_NAME(open)(path, O_RDONLY); - if (fd < 0) { - perror(path); - return; - } - err = SYSIO_INTERFACE_NAME(fstat)(fd, &stbuf1); - if (!err) - err = SYSIO_INTERFACE_NAME(stat)(path, &stbuf2); -#ifdef notdef - if (!err) - err = SYSIO_INTERFACE_NAME(fstatvfs)(fd, &stvfsbuf1); - if (!err) - err = SYSIO_INTERFACE_NAME(statvfs)(path, &stvfsbuf1); -#endif - if (err) { - perror(path); - goto out; - } - if (stbuf1.st_dev != stbuf2.st_dev || - stbuf1.st_ino != stbuf2.st_ino) { - (void )fprintf(stderr, "%s: [f]stat info mismatch\n", path); - goto out; - } -#ifdef notdef - if (stvfsbuf1.f_fsid != stvfsbuf2.f_fsid) { - (void )fprintf(stderr, "%s: [f]statvfs info mismatch\n", path); - } -#endif - printf("%s:" - " dev %lu," - " ino %lu," - " mode %lu," - " nlink %lu," - " uid %lu," - " gid %lu," - " rdev %lu," - " size %llu," - " blksize %lu," - " blocks %lu," - " atime %lu," - " mtime %lu," - " ctime %lu" - "\n", - path, - (unsigned long )stbuf1.st_dev, - (unsigned long )stbuf1.st_ino, - (unsigned long )stbuf1.st_mode, - (unsigned long )stbuf1.st_nlink, - (unsigned long )stbuf1.st_uid, - (unsigned long )stbuf1.st_gid, - (unsigned long )stbuf1.st_rdev, - (unsigned long long)stbuf1.st_size, - (unsigned long )stbuf1.st_blksize, - (unsigned long )stbuf1.st_blocks, - (unsigned long )stbuf1.st_atime, - (unsigned long )stbuf1.st_mtime, - (unsigned long )stbuf1.st_ctime); -out: - if (SYSIO_INTERFACE_NAME(close)(fd) != 0) - perror("closing file"); -} diff --git a/libsysio/tests/test_stddir.c b/libsysio/tests/test_stddir.c deleted file mode 100644 index b122f9362aaa15f345b0b7b00fa4274b40a47cc8..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_stddir.c +++ /dev/null @@ -1,171 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdlib.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <getopt.h> -#include <dirent.h> -#include <sys/types.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Test {open, read, close}dir functions - * - * Usage: test_stddir [path, ...] - */ -static int testit(const char *); -static void usage(void); - -int -main (int argc, char** argv) -{ - int err; - int i; - int n; - const char *path; - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - n = argc - optind; - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - err = testit(buf); - if (err) - break; - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - path = argv[optind++]; - err = testit(path); - if (err) - break; - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return err; -} - -int -testit(const char *path) -{ - DIR *d; - struct dirent *de; - - printf("testing directory functions on %s\n", path); - - if ((d = SYSIO_INTERFACE_NAME(opendir)(path)) == NULL) { - perror(path); - return errno; - } - - while ((de = SYSIO_INTERFACE_NAME(readdir)(d)) != NULL) - printf("\t %s: ino %lu off %lu type %u\n", - de->d_name, (unsigned long )de->d_ino, - (unsigned long )de->d_off, (int )de->d_type); - - if (SYSIO_INTERFACE_NAME(closedir)(d)) { - perror("closedir"); - return errno; - } - - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: test_stddir [<path> ...]\n"); - - exit(1); -} diff --git a/libsysio/tests/test_symlink.c b/libsysio/tests/test_symlink.c deleted file mode 100644 index a8e134a27fc1df07899eb1999a7d52255926a6ee..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_symlink.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2006 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#if 0 -#include <dirent.h> -#endif -#include <sys/uio.h> -#include <sys/queue.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Test soft links - * - * Usage: symlink oldpath newpath - * - */ - -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - if (n < 2) usage(); - - /* - * Try paths listed on command-line. - */ - while (optind < argc) { - const char *old, *new; - struct stat stbuf; - - old = argv[optind++]; - new = argv[optind++]; - if ((err = SYSIO_INTERFACE_NAME(symlink)(old, new)) != 0) { - perror("link"); - break; - } - if ((err = SYSIO_INTERFACE_NAME(lstat)(new, &stbuf)) != 0) { - perror(new); - break; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return err ? -1 : 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: symlink" - " oldpath newpath\n"); - - exit(1); -} diff --git a/libsysio/tests/test_unlink.c b/libsysio/tests/test_unlink.c deleted file mode 100644 index 3b02a9f1d3af66e57390568a4a8a7741845537f5..0000000000000000000000000000000000000000 --- a/libsysio/tests/test_unlink.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * This Cplant(TM) source code is the property of Sandia National - * Laboratories. - * - * This Cplant(TM) source code is copyrighted by Sandia National - * Laboratories. - * - * The redistribution of this Cplant(TM) source code is subject to the - * terms of the GNU Lesser General Public License - * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) - * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. - * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive - * license for use of this work by or on behalf of the US Government. - * Export of this program may require a license from the United States - * Government. - */ - -/* - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Questions or comments about this library should be sent to: - * - * Lee Ward - * Sandia National Laboratories, New Mexico - * P.O. Box 5800 - * Albuquerque, NM 87185-1110 - * - * lee@sandia.gov - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <sys/types.h> -#if 0 -#include <dirent.h> -#endif -#include <sys/uio.h> -#include <getopt.h> - -#if defined(SYSIO_LABEL_NAMES) -#include "sysio.h" -#endif -#include "xtio.h" -#include "test.h" - -/* - * Unlink files. - * - * Usage: unlink [path...] - * - * Without any path arguments, the program unlinks files named - * by the ocmmand line args. - */ - -static int unlinkit(const char *path); -static void usage(void); - -int -main(int argc, char *const argv[]) -{ - int i; - int err; - int n; - extern int _test_sysio_startup(void); - - /* - * Parse command line arguments. - */ - while ((i = getopt(argc, argv, "")) != -1) - switch (i) { - - default: - usage(); - } - - /* - * Init sysio lib. - */ - err = _test_sysio_startup(); - if (err) { - errno = -err; - perror("sysio startup"); - exit(1); - } - - n = argc - optind; - - /* - * Try path(s) listed on command-line. - */ - while (optind < argc) { - const char *path; - - path = argv[optind++]; - (void )unlinkit(path); - } - - /* - * If no command-line arguments, read from stdin until EOF. - */ - if (!n) { - int doflush; - static char buf[4096]; - size_t len; - char *cp; - char c; - - doflush = 0; - while (fgets(buf, sizeof(buf), stdin) != NULL) { - len = strlen(buf); - cp = buf + len - 1; - c = *cp; - *cp = '\0'; - if (!doflush) - unlinkit(buf); - doflush = c == '\n' ? 0 : 1; - } - } - - /* - * Clean up. - */ - _test_sysio_shutdown(); - - return 0; -} - -static int -unlinkit(const char *path) -{ - - if (SYSIO_INTERFACE_NAME(unlink)(path) != 0) { - perror(path); - return -1; - } - - return 0; -} - -static void -usage() -{ - - (void )fprintf(stderr, - "Usage: unlink" - " [<path> ...\n]"); - - exit(1); -} diff --git a/lnet/.cvsignore b/lnet/.cvsignore deleted file mode 100644 index f30d8625f4ae34b5f3c6e405883d32db59ae42dc..0000000000000000000000000000000000000000 --- a/lnet/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -Kernelenv -Makefile -autoMakefile -autoMakefile.in -aclocal.m4 -autom4te.cache -config.log -config.status -configure -.*.cmd -.depend diff --git a/lnet/ChangeLog b/lnet/ChangeLog deleted file mode 100644 index 0b126ce0be388d49755160593ac3a88931198569..0000000000000000000000000000000000000000 --- a/lnet/ChangeLog +++ /dev/null @@ -1,346 +0,0 @@ -2007-04-23 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.11 / 1.6.1 - * Support for networks: - socklnd - kernels up to 2.6.16 - qswlnd - Qsnet kernel modules 5.20 and later - openiblnd - IbGold 1.8.2 - o2iblnd - OFED 1.1 - viblnd - Voltaire ibhost 3.4.5 and later - ciblnd - Topspin 3.2.0 - iiblnd - Infiniserv 3.3 + PathBits patch - gmlnd - GM 2.1.22 and later - mxlnd - MX 1.2.1 or later - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - * bug fixes - ------------------------------------------------------------------------------- - -2007-04-01 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.10 / 1.6.0 - * Support for networks: - socklnd - kernels up to 2.6.16 - qswlnd - Qsnet kernel modules 5.20 and later - openiblnd - IbGold 1.8.2 - o2iblnd - OFED 1.1 - viblnd - Voltaire ibhost 3.4.5 and later - ciblnd - Topspin 3.2.0 - iiblnd - Infiniserv 3.3 + PathBits patch - gmlnd - GM 2.1.22 and later - mxlnd - MX 1.2.1 or later - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - * bug fixes - -Severity : major -Frequency : rare -Bugzilla : 12455 -Description: A race in kernel ptllnd between deleting a peer and posting - new communications for it could hang communications - - manifesting as "Unexpectedly long timeout" messages. - -Severity : major -Frequency : rare -Bugzilla : 12432 -Description: Kernel ptllnd lock ordering issue could hang a node. - -Severity : major -Frequency : rare -Bugzilla : 12016 -Description: node crash on socket teardown race - -Severity : minor -Frequency : 'lctl peer_list' issued on a mx net -Bugzilla : 12237 -Description: Enable lctl's peer_list for MXLND - -Severity : major -Frequency : after Ptllnd timeouts and portals congestion -Bugzilla : 11659 -Description: Credit overflows -Details : This was a bug in ptllnd connection establishment. The fix - implements better peer stamps to disambiguate connection - establishment and ensure both peers enter the credit flow - state machine consistently. - -Severity : major -Frequency : rare -Bugzilla : 11394 -Description: kptllnd didn't propagate some network errors up to LNET -Details : This bug was spotted while investigating 11394. The fix - ensures network errors on sends and bulk transfers are - propagated to LNET/lustre correctly. - -Severity : enhancement -Bugzilla : 10316 -Description: Fixed console chatter in case of -ETIMEDOUT. - -Severity : enhancement -Bugzilla : 11684 -Description: Added D_NETTRACE for recording network packet history - (initially only for ptllnd). Also a separate userspace - ptllnd facility to gather history which should really be - covered by D_NETTRACE too, if only CDEBUG recorded history in - userspace. - -Severity : major -Frequency : rare -Bugzilla : 11616 -Description: o2iblnd handle early RDMA_CM_EVENT_DISCONNECTED. -Details : If the fabric is lossy, an RDMA_CM_EVENT_DISCONNECTED - callback can occur before a connection has actually been - established. This caused an assertion failure previously. - -Severity : enhancement -Bugzilla : 11094 -Description: Multiple instances for o2iblnd -Details : Allow multiple instances of o2iblnd to enable networking over - multiple HCAs and routing between them. - -Severity : major -Bugzilla : 11201 -Description: lnet deadlock in router_checker -Details : turned ksnd_connd_lock, ksnd_reaper_lock, and ksock_net_t:ksnd_lock - into BH locks to eliminate potential deadlock caused by - ksocknal_data_ready() preempting code holding these locks. - -Severity : major -Bugzilla : 11126 -Description: Millions of failed socklnd connection attempts cause a very slow FS -Details : added a new route flag ksnr_scheduled to distinguish from - ksnr_connecting, so that a peer connection request is only turned - down for race concerns when an active connection to the same peer - is under progress (instead of just being scheduled). - ------------------------------------------------------------------------------- - -2007-02-09 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.9 - * Support for networks: - socklnd - kernels up to 2.6.16 - qswlnd - Qsnet kernel modules 5.20 and later - openiblnd - IbGold 1.8.2 - o2iblnd - OFED 1.1 - viblnd - Voltaire ibhost 3.4.5 and later - ciblnd - Topspin 3.2.0 - iiblnd - Infiniserv 3.3 + PathBits patch - gmlnd - GM 2.1.22 and later - mxlnd - MX 1.2.1 or later - ptllnd - Portals 3.3 / UNICOS/lc 1.5.x, 2.0.x - * bug fixes - -Severity : major on XT3 -Bugzilla : none -Description: libcfs overwrites /proc/sys/portals -Details : libcfs created a symlink from /proc/sys/portals to - /proc/sys/lnet for backwards compatibility. This is no - longer required and makes the Cray portals /proc variables - inaccessible. - -Severity : minor -Bugzilla : 11312 -Description: OFED FMR API change -Details : This changes parameter usage to reflect a change in - ib_fmr_pool_map_phys() between OFED 1.0 and OFED 1.1. Note - that FMR support is only used in experimental versions of the - o2iblnd - this change does not affect standard usage at all. - -Severity : enhancement -Bugzilla : 11245 -Description: new ko2iblnd module parameter: ib_mtu -Details : the default IB MTU of 2048 performs badly on 23108 Tavor - HCAs. You can avoid this problem by setting the MTU to 1024 - using this module parameter. - -Severity : enhancement -Bugzilla : 11118/11620 -Description: ptllnd small request message buffer alignment fix -Details : Set the PTL_MD_LOCAL_ALIGN8 option on small message receives. - Round up small message size on sends in case this option - is not supported. 11620 was a defect in the initial - implementation which effectively asserted all peers had to be - running the correct protocol version which was fixed by always - NAK-ing such requests and handling any misalignments they - introduce. - -Severity : minor -Frequency : rarely -Description: When kib(nal|lnd)_del_peer() is called upon a peer whose - ibp_tx_queue is not empty, kib(nal|lnd)_destroy_peer()'s - 'LASSERT(list_empty(&peer->ibp_tx_queue))' will fail. - -Severity : enhancement -Bugzilla : 11250 -Description: Patchless ZC(zero copy) socklnd -Details : New protocol for socklnd, socklnd can support zero copy without - kernel patch, it's compatible with old socklnd. Checksum is - moved from tunables to modparams. - -Severity : minor -Frequency : rarely -Description: When ksocknal_del_peer() is called upon a peer whose - ksnp_tx_queue is not empty, ksocknal_destroy_peer()'s - 'LASSERT(list_empty(&peer->ksnp_tx_queue))' will fail. - -Severity : normal -Frequency : when ptlrpc is under heavy use and runs out of request buffer -Bugzilla : 11318 -Description: In lnet_match_blocked_msg(), md can be used without holding a - ref on it. - -Severity : minor -Frequency : very rarely -Bugzilla : 10727 -Description: If ksocknal_lib_setup_sock() fails, a ref on peer is lost. - If connd connects a route which has been closed by - ksocknal_shutdown(), ksocknal_create_routes() may create new - routes which hold references on the peer, causing shutdown - process to wait for peer to disappear forever. - -Severity : enhancement -Bugzilla : 11234 -Description: Dump XT3 portals traces on kptllnd timeout -Details : Set the kptllnd module parameter "ptltrace_on_timeout=1" to - dump Cray portals debug traces to a file. The kptllnd module - parameter "ptltrace_basename", default "/tmp/lnet-ptltrace", - is the basename of the dump file. - -Severity : major -Frequency : infrequent -Bugzilla : 11308 -Description: kernel ptllnd fix bug in connection re-establishment -Details : Kernel ptllnd could produce protocol errors e.g. illegal - matchbits and/or violate the credit flow protocol when trying - to re-establish a connection with a peer after an error or - timeout. - -Severity : enhancement -Bugzilla : 10316 -Description: Allow /proc/sys/lnet/debug to be set symbolically -Details : Allow debug and subsystem debug values to be read/set by name - in addition to numerically, for ease of use. - -Severity : normal -Frequency : only in configurations with LNET routers -Bugzilla : 10316 -Description: routes automatically marked down and recovered -Details : In configurations with LNET routers if a router fails routers - now actively try to recover routes that are down, unless they - are marked down by an administrator. - ------------------------------------------------------------------------------- - -2006-12-09 Cluster File Systems, Inc. <info@clusterfs.com> - -Severity : critical -Frequency : very rarely, in configurations with LNET routers and TCP -Bugzilla : 10889 -Description: incorrect data written to files on OSTs -Details : In certain high-load conditions incorrect data may be written - to files on the OST when using TCP networks. - ------------------------------------------------------------------------------- - -2006-07-31 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.7 - - rework CDEBUG messages rate-limiting mechanism b=10375 - - add per-socket tunables for socklnd if the kernel is patched b=10327 - ------------------------------------------------------------------------------- - -2006-02-15 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.6 - - fix use of portals/lnet pid to avoid dropping RPCs b=10074 - - iiblnd wasn't mapping all memory, resulting in comms errors b=9776 - - quiet LNET startup LNI message for liblustre b=10128 - - Better console error messages if 'ip2nets' can't match an IP address - - Fixed overflow/use-before-set bugs in linux-time.h - - Fixed ptllnd bug that wasn't initialising rx descriptors completely - - LNET teardown failed an assertion about the route table being empty - - Fixed a crash in LNetEQPoll(<invalid handle>) - - Future protocol compatibility work (b_rls146_lnetprotovrsn) - - improve debug message for liblustre/Catamount nodes (b=10116) - -2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com> - * Configuration change for the XT3 - The PTLLND is now used to run Lustre over Portals on the XT3. - The configure option(s) --with-cray-portals are no longer - used. Rather --with-portals=<path-to-portals-includes> is - used to enable building on the XT3. In addition to enable - XT3 specific features the option --enable-cray-xt3 must be - used. - -2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com> - * Portals has been removed, replaced by LNET. - LNET is new networking infrastructure for Lustre, it includes a - reorganized network configuration mode (see the user - documentation for full details) as well as support for routing - between different network fabrics. Lustre Networking Devices - (LNDS) for the supported network fabrics have also been created - for this new infrastructure. - -2005-08-08 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.4 - * bug fixes - -Severity : major -Frequency : rare (large Voltaire clusters only) -Bugzilla : 6993 -Description: the default number of reserved transmit descriptors was too low - for some large clusters -Details : As a workaround, the number was increased. A proper fix includes - a run-time tunable. - -2005-06-02 Cluster File Systems, Inc. <info@clusterfs.com> - * version 1.4.3 - * bug fixes - -Severity : major -Frequency : occasional (large-scale events, cluster reboot, network failure) -Bugzilla : 6411 -Description: too many error messages on console obscure actual problem and - can slow down/panic server, or cause recovery to fail repeatedly -Details : enable rate-limiting of console error messages, and some messages - that were console errors now only go to the kernel log - -Severity : enhancement -Bugzilla : 1693 -Description: add /proc/sys/portals/catastrophe entry which will report if - that node has previously LBUGged - -2005-04-06 Cluster File Systems, Inc. <info@clusterfs.com> - * bugs - - update gmnal to use PTL_MTU, fix module refcounting (b=5786) - -2005-04-04 Cluster File Systems, Inc. <info@clusterfs.com> - * bugs - - handle error return code in kranal_check_fma_rx() (5915,6054) - -2005-02-04 Cluster File Systems, Inc. <info@clusterfs.com> - * miscellania - - update vibnal (Voltaire IB NAL) - - update gmnal (Myrinet NAL), gmnalid - -2005-02-04 Eric Barton <eeb@bartonsoftware.com> - - * Landed portals:b_port_step as follows... - - - removed CFS_DECL_SPIN* - just use 'spinlock_t' and initialise with spin_lock_init() - - - removed CFS_DECL_MUTEX* - just use 'struct semaphore' and initialise with init_mutex() - - - removed CFS_DECL_RWSEM* - just use 'struct rw_semaphore' and initialise with init_rwsem() - - - renamed cfs_sleep_chan -> cfs_waitq - cfs_sleep_link -> cfs_waitlink - - - fixed race in linux version of arch-independent socknal - (the ENOMEM/EAGAIN decision). - - - Didn't fix problems in Darwin version of arch-independent socknal - (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision) - - - removed libcfs types from non-socknal header files (only some types - in the header files had been changed; the .c files hadn't been - updated at all). diff --git a/lnet/Kernelenv.in b/lnet/Kernelenv.in deleted file mode 100644 index 59eda309e80c03d4b663988e5491c9a5b8df2796..0000000000000000000000000000000000000000 --- a/lnet/Kernelenv.in +++ /dev/null @@ -1,6 +0,0 @@ -EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include -# lnet/utils/debug.c wants <linux/version.h> from userspace. sigh. -HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) -LIBREADLINE := @LIBREADLINE@ -# 2.5's makefiles aren't nice to cross dir libraries in host programs -PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/Kernelenv.mk b/lnet/Kernelenv.mk deleted file mode 100644 index d973e5da2b76f671fbff82188c64dc90bc0aa0c0..0000000000000000000000000000000000000000 --- a/lnet/Kernelenv.mk +++ /dev/null @@ -1,4 +0,0 @@ -EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include -HOSTCFLAGS := $(EXTRA_CFLAGS) -# the kernel doesn't want us to build archives for host binaries :/ -PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/lnet/LICENSE b/lnet/LICENSE deleted file mode 100644 index 92728f4d300d2b6d965a4f0aba46552b1831c118..0000000000000000000000000000000000000000 --- a/lnet/LICENSE +++ /dev/null @@ -1,363 +0,0 @@ -Each file in this distribution should contain a header stating the -copyright owner(s), and the licensing terms for that module. Some -files are not eligible for copyright protection, and contain neither. - -All files in this subtree are licensed under the terms and conditions -of the GNU General Public License version 2. - -Reproduced below is the GPL v2, and Linus's clarifying statement from -the Linux kernel source code: - ----------------------------------------- - - NOTE! This copyright does *not* cover user programs that use kernel - services by normal system calls - this is merely considered normal use - of the kernel, and does *not* fall under the heading of "derived work". - Also note that the GPL below is copyrighted by the Free Software - Foundation, but the instance of code that it refers to (the Linux - kernel) is copyrighted by me and others who actually wrote it. - - Linus Torvalds - ----------------------------------------- - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - <one line to give the program's name and a brief idea of what it does.> - Copyright (C) 19yy <name of author> - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - <signature of Ty Coon>, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/lnet/Makefile.in b/lnet/Makefile.in deleted file mode 100644 index 553578cf8f4bcfe19d43b091f983add247c3c90c..0000000000000000000000000000000000000000 --- a/lnet/Makefile.in +++ /dev/null @@ -1,8 +0,0 @@ -subdir-m += libcfs - -lnet-subdirs += lnet -lnet-subdirs += klnds -lnet-subdirs += tests -subdir-m += $(lnet-subdirs) - -@INCLUDE_RULES@ diff --git a/lnet/autoMakefile.am b/lnet/autoMakefile.am deleted file mode 100644 index 27a60a8b63d542c1e153b92270e1caf72096f9ad..0000000000000000000000000000000000000000 --- a/lnet/autoMakefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = libcfs lnet klnds ulnds tests doc utils include \ - autoconf - -sources: - $(MAKE) sources -C libcfs diff --git a/lnet/autoconf/.cvsignore b/lnet/autoconf/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/autoconf/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/autoconf/Makefile.am b/lnet/autoconf/Makefile.am deleted file mode 100644 index 171634a23ad0c9b9cb19344224f42b48daffd317..0000000000000000000000000000000000000000 --- a/lnet/autoconf/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lustre-lnet.m4 diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 deleted file mode 100644 index d469e7cb6ca72c25f0fcfb5df3fa22e468d3cd5e..0000000000000000000000000000000000000000 --- a/lnet/autoconf/lustre-lnet.m4 +++ /dev/null @@ -1,1373 +0,0 @@ -# -# LN_CONFIG_MAX_PAYLOAD -# -# configure maximum payload -# -AC_DEFUN([LN_CONFIG_MAX_PAYLOAD], -[AC_MSG_CHECKING([for non-default maximum LNET payload]) -AC_ARG_WITH([max-payload-mb], - AC_HELP_STRING([--with-max-payload-mb=MBytes], - [set maximum lnet payload in MBytes]), - [ - AC_MSG_RESULT([$with_max_payload_mb]) - LNET_MAX_PAYLOAD_MB=$with_max_payload_mb - LNET_MAX_PAYLOAD="(($with_max_payload_mb)<<20)" - ], [ - AC_MSG_RESULT([no]) - LNET_MAX_PAYLOAD="LNET_MTU" - ]) - AC_DEFINE_UNQUOTED(LNET_MAX_PAYLOAD, $LNET_MAX_PAYLOAD, - [Max LNET payload]) -]) - -# -# LN_CHECK_GCC_VERSION -# -# Check compiler version -# -AC_DEFUN([LN_CHECK_GCC_VERSION], -[AC_MSG_CHECKING([compiler version]) -PTL_CC_VERSION=`$CC --version | awk '/^gcc/{print $ 3}'` -PTL_MIN_CC_VERSION="3.2.2" -v2n() { - awk -F. '{printf "%d\n", (($ 1)*100+($ 2))*100+($ 3)}' -} -if test -z "$PTL_CC_VERSION" -o \ - `echo $PTL_CC_VERSION | v2n` -ge `echo $PTL_MIN_CC_VERSION | v2n`; then - AC_MSG_RESULT([ok]) -else - AC_MSG_RESULT([Buggy compiler found]) - AC_MSG_ERROR([Need gcc version >= $PTL_MIN_CC_VERSION]) -fi -]) - -# -# LN_CONFIG_CDEBUG -# -# whether to enable various libcfs debugs (CDEBUG, ENTRY/EXIT, LASSERT, etc.) -# -AC_DEFUN([LN_CONFIG_CDEBUG], -[ -AC_MSG_CHECKING([whether to enable CDEBUG, CWARN]) -AC_ARG_ENABLE([libcfs_cdebug], - AC_HELP_STRING([--disable-libcfs-cdebug], - [disable libcfs CDEBUG, CWARN]), - [],[enable_libcfs_cdebug='yes']) -AC_MSG_RESULT([$enable_libcfs_cdebug]) -if test x$enable_libcfs_cdebug = xyes; then - AC_DEFINE(CDEBUG_ENABLED, 1, [enable libcfs CDEBUG, CWARN]) -else - AC_DEFINE(CDEBUG_ENABLED, 0, [disable libcfs CDEBUG, CWARN]) -fi - -AC_MSG_CHECKING([whether to enable ENTRY/EXIT]) -AC_ARG_ENABLE([libcfs_trace], - AC_HELP_STRING([--disable-libcfs-trace], - [disable libcfs ENTRY/EXIT]), - [],[enable_libcfs_trace='yes']) -AC_MSG_RESULT([$enable_libcfs_trace]) -if test x$enable_libcfs_trace = xyes; then - AC_DEFINE(CDEBUG_ENTRY_EXIT, 1, [enable libcfs ENTRY/EXIT]) -else - AC_DEFINE(CDEBUG_ENTRY_EXIT, 0, [disable libcfs ENTRY/EXIT]) -fi - -AC_MSG_CHECKING([whether to enable LASSERT, LASSERTF]) -AC_ARG_ENABLE([libcfs_assert], - AC_HELP_STRING([--disable-libcfs-assert], - [disable libcfs LASSERT, LASSERTF]), - [],[enable_libcfs_assert='yes']) -AC_MSG_RESULT([$enable_libcfs_assert]) -if test x$enable_libcfs_assert = xyes; then - AC_DEFINE(LIBCFS_DEBUG, 1, [enable libcfs LASSERT, LASSERTF]) -fi -]) - -# -# LN_CONFIG_AFFINITY -# -# check if cpu affinity is available/wanted -# -AC_DEFUN([LN_CONFIG_AFFINITY], -[AC_ARG_ENABLE([affinity], - AC_HELP_STRING([--disable-affinity], - [disable process/irq affinity]), - [],[enable_affinity='yes']) - -AC_MSG_CHECKING([for CPU affinity support]) -if test x$enable_affinity = xno ; then - AC_MSG_RESULT([no (by request)]) -else - LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> - ],[ - struct task_struct t; - #if HAVE_CPUMASK_T - cpumask_t m; - #else - unsigned long m; - #endif - set_cpus_allowed(&t, m); - ],[ - AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support]) - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no (no kernel support)]) - ]) -fi -]) - -# -# LN_CONFIG_PORTALS -# -# configure support for Portals -# -AC_DEFUN([LN_CONFIG_PORTALS], -[AC_MSG_CHECKING([for portals]) -AC_ARG_WITH([portals], - AC_HELP_STRING([--with-portals=path], - [set path to portals]), - [ - case $with_portals in - no) ENABLEPORTALS=0 - ;; - *) PORTALS="${with_portals}" - ENABLEPORTALS=1 - ;; - esac - - ], [ - ENABLEPORTALS=0 - ]) -PTLLNDCPPFLAGS="" -if test $ENABLEPORTALS -eq 0; then - AC_MSG_RESULT([no]) -elif test ! \( -f ${PORTALS}/include/portals/p30.h \); then - AC_MSG_RESULT([no]) - AC_MSG_ERROR([bad --with-portals path]) -else - AC_MSG_RESULT([$PORTALS]) - PTLLNDCPPFLAGS="-I${PORTALS}/include" -fi -AC_SUBST(PTLLNDCPPFLAGS) -]) - -# -# LN_CONFIG_BACKOFF -# -# check if tunable tcp backoff is available/wanted -# -AC_DEFUN([LN_CONFIG_BACKOFF], -[AC_MSG_CHECKING([for tunable backoff TCP support]) -AC_ARG_ENABLE([backoff], - AC_HELP_STRING([--disable-backoff], - [disable socknal tunable backoff]), - [],[enable_backoff='yes']) -if test x$enable_backoff = xno ; then - AC_MSG_RESULT([no (by request)]) -else - BOCD="`grep -c TCP_BACKOFF $LINUX/include/linux/tcp.h`" - if test "$BOCD" != 0 ; then - AC_DEFINE(SOCKNAL_BACKOFF, 1, [use tunable backoff TCP]) - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT([no (no kernel support)]) - fi -fi -]) - -# -# LN_CONFIG_PANIC_DUMPLOG -# -# check if tunable panic_dumplog is wanted -# -AC_DEFUN([LN_CONFIG_PANIC_DUMPLOG], -[AC_MSG_CHECKING([for tunable panic_dumplog support]) -AC_ARG_ENABLE([panic_dumplog], - AC_HELP_STRING([--enable-panic_dumplog], - [enable panic_dumplog]), - [],[enable_panic_dumplog='no']) -if test x$enable_panic_dumplog = xyes ; then - AC_DEFINE(LNET_DUMP_ON_PANIC, 1, [use dumplog on panic]) - AC_MSG_RESULT([yes (by request)]) -else - AC_MSG_RESULT([no]) -fi -]) - -# -# LN_CONFIG_PTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_PTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the kernel portals LND]) - -PTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - PTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(PTLLND) -]) - -# -# LN_CONFIG_UPTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_UPTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the userspace portals LND]) - -UPTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - UPTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(UPTLLND) -]) - -# -# LN_CONFIG_USOCKLND -# -# configure support for userspace TCP/IP LND -# -AC_DEFUN([LN_CONFIG_USOCKLND], -[AC_MSG_CHECKING([whether to build usocklnd]) -AC_ARG_ENABLE([usocklnd], - AC_HELP_STRING([--disable-usocklnd], - [disable usocklnd]), - [],[enable_usocklnd='yes']) - -if test x$enable_usocklnd = xyes ; then - if test "$ENABLE_LIBPTHREAD" = "yes" ; then - AC_MSG_RESULT([yes]) - USOCKLND="usocklnd" - else - AC_MSG_RESULT([no (libpthread not present or disabled)]) - USOCKLND="" - fi -else - AC_MSG_RESULT([no (disabled explicitly)]) - USOCKLND="" -fi -AC_SUBST(USOCKLND) -]) - -# -# LN_CONFIG_QUADRICS -# -# check if quadrics support is in this kernel -# -AC_DEFUN([LN_CONFIG_QUADRICS], -[AC_MSG_CHECKING([for QsNet sources]) -AC_ARG_WITH([qsnet], - AC_HELP_STRING([--with-qsnet=path], - [set path to qsnet source (default=$LINUX)]), - [QSNET=$with_qsnet], - [QSNET=$LINUX]) -AC_MSG_RESULT([$QSNET]) - -AC_MSG_CHECKING([if quadrics kernel headers are present]) -if test -d $QSNET/drivers/net/qsnet ; then - AC_MSG_RESULT([yes]) - QSWLND="qswlnd" - AC_MSG_CHECKING([for multirail EKC]) - if test -f $QSNET/include/elan/epcomms.h; then - AC_MSG_RESULT([supported]) - QSWCPPFLAGS="-I$QSNET/include -DMULTIRAIL_EKC=1" - else - AC_MSG_RESULT([not supported]) - AC_MSG_ERROR([Need multirail EKC]) - fi - - if test x$QSNET = x$LINUX ; then - LB_LINUX_CONFIG([QSNET],[],[ - LB_LINUX_CONFIG([QSNET_MODULE],[],[ - AC_MSG_WARN([QSNET is not enabled in this kernel; not building qswlnd.]) - QSWLND="" - QSWCPPFLAGS="" - ]) - ]) - fi -else - AC_MSG_RESULT([no]) - QSWLND="" - QSWCPPFLAGS="" -fi -AC_SUBST(QSWCPPFLAGS) -AC_SUBST(QSWLND) -]) - -# -# LN_CONFIG_GM -# -# check if GM support is available -# -AC_DEFUN([LN_CONFIG_GM],[ -AC_MSG_CHECKING([whether to enable GM support]) -AC_ARG_WITH([gm], - AC_HELP_STRING([--with-gm=path-to-gm-source-tree], - [build gmlnd against path]), - [ - case $with_gm in - no) ENABLE_GM=0 - ;; - *) ENABLE_GM=1 - GM_SRC="$with_gm" - ;; - esac - ],[ - ENABLE_GM=0 - ]) -AC_ARG_WITH([gm-install], - AC_HELP_STRING([--with-gm-install=path-to-gm-install-tree], - [say where GM has been installed]), - [ - GM_INSTALL=$with_gm_install - ],[ - GM_INSTALL="/opt/gm" - ]) -if test $ENABLE_GM -eq 0; then - AC_MSG_RESULT([no]) -else - AC_MSG_RESULT([yes]) - - GMLND="gmlnd" - GMCPPFLAGS="-I$GM_SRC/include -I$GM_SRC/drivers -I$GM_SRC/drivers/linux/gm" - - if test -f $GM_INSTALL/lib/libgm.a -o \ - -f $GM_INSTALL/lib64/libgm.a; then - GMLIBS="-L$GM_INSTALL/lib -L$GM_INSTALL/lib64" - else - AC_MSG_ERROR([Cant find GM libraries under $GM_INSTALL]) - fi - - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$GMCPPFLAGS -DGM_KERNEL $EXTRA_KCFLAGS" - - AC_MSG_CHECKING([that code using GM compiles with given path]) - LB_LINUX_TRY_COMPILE([ - #define GM_STRONG_TYPES 1 - #ifdef VERSION - #undef VERSION - #endif - #include "gm.h" - #include "gm_internal.h" - ],[ - struct gm_port *port = NULL; - gm_recv_event_t *rxevent = gm_blocking_receive_no_spin(port); - return 0; - ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no]) - AC_MSG_ERROR([Bad --with-gm path]) - ]) - - AC_MSG_CHECKING([that GM has gm_register_memory_ex_phys()]) - LB_LINUX_TRY_COMPILE([ - #define GM_STRONG_TYPES 1 - #ifdef VERSION - #undef VERSION - #endif - #include "gm.h" - #include "gm_internal.h" - ],[ - gm_status_t gmrc; - struct gm_port *port = NULL; - gm_u64_t phys = 0; - gm_up_t pvma = 0; - - gmrc = gm_register_memory_ex_phys(port, phys, 100, pvma); - return 0; - ],[ - AC_MSG_RESULT([yes]) - ],[ - AC_MSG_RESULT([no. -Please patch the GM sources as follows... - cd $GM_SRC - patch -p0 < $PWD/lnet/klnds/gmlnd/gm-reg-phys.patch -...then rebuild and re-install them]) - AC_MSG_ERROR([Can't build GM without gm_register_memory_ex_phys()]) - ]) - - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(GMCPPFLAGS) -AC_SUBST(GMLIBS) -AC_SUBST(GMLND) -]) - - -# -# LN_CONFIG_MX -# -AC_DEFUN([LN_CONFIG_MX], -[AC_MSG_CHECKING([whether to enable Myrinet MX support]) -# set default -MXPATH="/opt/mx" -AC_ARG_WITH([mx], - AC_HELP_STRING([--with-mx=path], - [build mxlnd against path]), - [ - case $with_mx in - yes) ENABLEMX=2 - ;; - no) ENABLEMX=0 - ;; - *) MXPATH=$with_mx - ENABLEMX=3 - ;; - esac - ],[ - ENABLEMX=1 - ]) -if test $ENABLEMX -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${MXPATH}/include/myriexpress.h -a \ - -f ${MXPATH}/include/mx_kernel_api.h -a \ - -f ${MXPATH}/include/mx_pin.h \); then - AC_MSG_RESULT([no]) - case $ENABLEMX in - 1) ;; - 2) AC_MSG_ERROR([Myrinet MX kernel headers not present]);; - 3) AC_MSG_ERROR([bad --with-mx path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - MXCPPFLAGS="-I$MXPATH/include" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $MXCPPFLAGS" - MXLIBS="-L$MXPATH/lib" - LB_LINUX_TRY_COMPILE([ - #define MX_KERNEL 1 - #include <mx_extensions.h> - #include <myriexpress.h> - ],[ - mx_endpoint_t end; - mx_status_t status; - mx_request_t request; - int result; - - mx_init(); - mx_open_endpoint(MX_ANY_NIC, MX_ANY_ENDPOINT, 0, NULL, 0, &end); - mx_register_unexp_handler(end, (mx_unexp_handler_t) NULL, NULL); - mx_wait_any(end, MX_INFINITE, 0LL, 0LL, &status, &result); - mx_iconnect(end, 0LL, 0, 0, 0, NULL, &request); - return 0; - ],[ - AC_MSG_RESULT([yes]) - MXLND="mxlnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEMX in - 1) ;; - 2) AC_MSG_ERROR([can't compile with Myrinet MX kernel headers]);; - 3) AC_MSG_ERROR([can't compile with Myrinet MX headers under $MXPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - MXLND="" - MXCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(MXCPPFLAGS) -AC_SUBST(MXLIBS) -AC_SUBST(MXLND) -]) - - - -# -# LN_CONFIG_O2IB -# -AC_DEFUN([LN_CONFIG_O2IB],[ -AC_MSG_CHECKING([whether to enable OpenIB gen2 support]) -# set default -O2IBPATH="$LINUX/drivers/infiniband" -AC_ARG_WITH([o2ib], - AC_HELP_STRING([--with-o2ib=path], - [build o2iblnd against path]), - [ - case $with_o2ib in - yes) ENABLEO2IB=2 - ;; - no) ENABLEO2IB=0 - ;; - *) O2IBPATH=$with_o2ib - ENABLEO2IB=3 - ;; - esac - ],[ - ENABLEO2IB=1 - ]) -if test $ENABLEO2IB -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${O2IBPATH}/include/rdma/rdma_cm.h -a \ - -f ${O2IBPATH}/include/rdma/ib_cm.h -a\ - -f ${O2IBPATH}/include/rdma/ib_verbs.h -a\ - -f ${O2IBPATH}/include/rdma/ib_fmr_pool.h \); then - AC_MSG_RESULT([no]) - case $ENABLEO2IB in - 1) ;; - 2) AC_MSG_ERROR([kernel OpenIB gen2 headers not present]);; - 3) AC_MSG_ERROR([bad --with-o2ib path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - O2IBCPPFLAGS="-I$O2IBPATH/include" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $O2IBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <linux/version.h> - #if !HAVE_GFP_T - typedef int gfp_t; - #endif - #include <rdma/rdma_cm.h> - #include <rdma/ib_cm.h> - #include <rdma/ib_verbs.h> - #include <rdma/ib_fmr_pool.h> - ],[ - struct rdma_cm_id *cm_id; - struct rdma_conn_param conn_param; - struct ib_device_attr device_attr; - struct ib_qp_attr qp_attr; - struct ib_pool_fmr pool_fmr; - enum ib_cm_rej_reason rej_reason; - - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); - return PTR_ERR(cm_id); - ],[ - AC_MSG_RESULT([yes]) - O2IBLND="o2iblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEO2IB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with kernel OpenIB gen2 headers]);; - 3) AC_MSG_ERROR([can't compile with OpenIB gen2 headers under $O2IBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - O2IBLND="" - O2IBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(O2IBCPPFLAGS) -AC_SUBST(O2IBLND) -]) - -# -# LN_CONFIG_OPENIB -# -# check for OpenIB in the kernel -AC_DEFUN([LN_CONFIG_OPENIB],[ -AC_MSG_CHECKING([whether to enable OpenIB support]) -# set default -OPENIBPATH="$LINUX/drivers/infiniband" -AC_ARG_WITH([openib], - AC_HELP_STRING([--with-openib=path], - [build openiblnd against path]), - [ - case $with_openib in - yes) ENABLEOPENIB=2 - ;; - no) ENABLEOPENIB=0 - ;; - *) OPENIBPATH="$with_openib" - ENABLEOPENIB=3 - ;; - esac - ],[ - ENABLEOPENIB=1 - ]) -if test $ENABLEOPENIB -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${OPENIBPATH}/include/ts_ib_core.h -a \ - -f ${OPENIBPATH}/include/ts_ib_cm.h -a\ - -f ${OPENIBPATH}/include/ts_ib_sa_client.h \); then - AC_MSG_RESULT([no]) - case $ENABLEOPENIB in - 1) ;; - 2) AC_MSG_ERROR([kernel OpenIB headers not present]);; - 3) AC_MSG_ERROR([bad --with-openib path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - case $ENABLEOPENIB in - 1|2) OPENIBCPPFLAGS="-I$OPENIBPATH/include -DIN_TREE_BUILD";; - 3) OPENIBCPPFLAGS="-I$OPENIBPATH/include";; - *) AC_MSG_RESULT([no]) - AC_MSG_ERROR([internal error]);; - esac - OPENIBCPPFLAGS="$OPENIBCPPFLAGS -DIB_NTXRXPARAMS=4" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <ts_ib_core.h> - #include <ts_ib_cm.h> - #include <ts_ib_sa_client.h> - ],[ - struct ib_device_properties dev_props; - struct ib_cm_active_param cm_active_params; - tTS_IB_CLIENT_QUERY_TID tid; - int enum1 = IB_QP_ATTRIBUTE_STATE; - int enum2 = IB_ACCESS_LOCAL_WRITE; - int enum3 = IB_CQ_CALLBACK_INTERRUPT; - int enum4 = IB_CQ_PROVIDER_REARM; - return 0; - ],[ - AC_MSG_RESULT([yes]) - OPENIBLND="openiblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEOPENIB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with kernel OpenIB headers]);; - 3) AC_MSG_ERROR([can't compile with OpenIB headers under $OPENIBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - OPENIBLND="" - OPENIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(OPENIBCPPFLAGS) -AC_SUBST(OPENIBLND) -]) - -# -# LN_CONFIG_CIBLND -# -AC_DEFUN([LN_CONFIG_CIB],[ -AC_MSG_CHECKING([whether to enable Cisco/TopSpin IB support]) -# set default -CIBPATH="" -CIBLND="" -AC_ARG_WITH([cib], - AC_HELP_STRING([--with-cib=path], - [build ciblnd against path]), - [ - case $with_cib in - no) AC_MSG_RESULT([no]);; - *) CIBPATH="$with_cib" - if test -d "$CIBPATH"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([No directory $CIBPATH]) - fi;; - esac - ],[ - AC_MSG_RESULT([no]) - ]) -if test -n "$CIBPATH"; then - CIBCPPFLAGS="-I${CIBPATH}/ib/ts_api_ng/include -I${CIBPATH}/all/kernel_services/include -DUSING_TSAPI" - CIBCPPFLAGS="$CIBCPPFLAGS -DIB_NTXRXPARAMS=3" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $CIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <ts_ib_core.h> - #include <ts_ib_cm.h> - #include <ts_ib_sa_client.h> - ],[ - struct ib_device_properties dev_props; - struct ib_cm_active_param cm_active_params; - tTS_IB_CLIENT_QUERY_TID tid; - int enum1 = TS_IB_QP_ATTRIBUTE_STATE; - int enum2 = TS_IB_ACCESS_LOCAL_WRITE; - int enum3 = TS_IB_CQ_CALLBACK_INTERRUPT; - int enum4 = TS_IB_CQ_PROVIDER_REARM; - return 0; - ],[ - CIBLND="ciblnd" - ],[ - AC_MSG_ERROR([can't compile ciblnd with given path]) - CIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(CIBCPPFLAGS) -AC_SUBST(CIBLND) -]) - -# -# LN_CONFIG_IIB -# -# check for infinicon infiniband support -# -AC_DEFUN([LN_CONFIG_IIB],[ -AC_MSG_CHECKING([whether to enable Infinicon support]) -# set default -IIBPATH="/usr/include" -AC_ARG_WITH([iib], - AC_HELP_STRING([--with-iib=path], - [build iiblnd against path]), - [ - case $with_iib in - yes) ENABLEIIB=2 - ;; - no) ENABLEIIB=0 - ;; - *) IIBPATH="${with_iib}/include" - ENABLEIIB=3 - ;; - esac - ],[ - ENABLEIIB=1 - ]) -if test $ENABLEIIB -eq 0; then - AC_MSG_RESULT([disabled]) -elif test ! \( -f ${IIBPATH}/linux/iba/ibt.h \); then - AC_MSG_RESULT([no]) - case $ENABLEIIB in - 1) ;; - 2) AC_MSG_ERROR([default Infinicon headers not present]);; - 3) AC_MSG_ERROR([bad --with-iib path]);; - *) AC_MSG_ERROR([internal error]);; - esac -else - IIBCPPFLAGS="-I$IIBPATH" - if test $IIBPATH != "/usr/include"; then - # we need /usr/include come what may - IIBCPPFLAGS="$IIBCPPFLAGS -I/usr/include" - fi - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $IIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <linux/iba/ibt.h> - ],[ - IBT_INTERFACE_UNION interfaces; - FSTATUS rc; - - rc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2, - &interfaces); - - return rc == FSUCCESS ? 0 : 1; - ],[ - AC_MSG_RESULT([yes]) - IIBLND="iiblnd" - ],[ - AC_MSG_RESULT([no]) - case $ENABLEIIB in - 1) ;; - 2) AC_MSG_ERROR([can't compile with default Infinicon headers]);; - 3) AC_MSG_ERROR([can't compile with Infinicon headers under $IIBPATH]);; - *) AC_MSG_ERROR([internal error]);; - esac - IIBLND="" - IIBCPPFLAGS="" - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(IIBCPPFLAGS) -AC_SUBST(IIBLND) -]) - -# -# LN_CONFIG_VIB -# -# check for Voltaire infiniband support -# -AC_DEFUN([LN_CONFIG_VIB], -[AC_MSG_CHECKING([whether to enable Voltaire IB support]) -VIBPATH="" -AC_ARG_WITH([vib], - AC_HELP_STRING([--with-vib=path], - [build viblnd against path]), - [ - case $with_vib in - no) AC_MSG_RESULT([no]);; - *) VIBPATH="${with_vib}/src/nvigor/ib-code" - if test -d "$with_vib" -a -d "$VIBPATH"; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - AC_MSG_ERROR([No directory $VIBPATH]) - fi;; - esac - ],[ - AC_MSG_RESULT([no]) - ]) -if test -z "$VIBPATH"; then - VIBLND="" -else - VIBCPPFLAGS="-I${VIBPATH}/include -I${VIBPATH}/cm" - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS" - LB_LINUX_TRY_COMPILE([ - #include <linux/list.h> - #include <asm/byteorder.h> - #ifdef __BIG_ENDIAN - # define CPU_BE 1 - # define CPU_LE 0 - #endif - #ifdef __LITTLE_ENDIAN - # define CPU_BE 0 - # define CPU_LE 1 - #endif - #include <vverbs.h> - #include <ib-cm.h> - #include <ibat.h> - ],[ - vv_hca_h_t kib_hca; - vv_return_t vvrc; - cm_cep_handle_t cep; - ibat_arp_data_t arp_data; - ibat_stat_t ibatrc; - - vvrc = vv_hca_open("ANY_HCA", NULL, &kib_hca); - cep = cm_create_cep(cm_cep_transp_rc); - ibatrc = ibat_get_ib_data((uint32_t)0, (uint32_t)0, - ibat_paths_primary, &arp_data, - (ibat_get_ib_data_reply_fn_t)NULL, - NULL, 0); - return 0; - ],[ - VIBLND="viblnd" - ],[ - AC_MSG_ERROR([can't compile viblnd with given path]) - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -if test -n "$VIBLND"; then - EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS" - AC_MSG_CHECKING([if Voltaire still uses void * sg addresses]) - LB_LINUX_TRY_COMPILE([ - #include <linux/list.h> - #include <asm/byteorder.h> - #ifdef __BIG_ENDIAN - # define CPU_BE 1 - # define CPU_LE 0 - #endif - #ifdef __LITTLE_ENDIAN - # define CPU_BE 0 - # define CPU_LE 1 - #endif - #include <vverbs.h> - #include <ib-cm.h> - #include <ibat.h> - ],[ - vv_scatgat_t sg; - - return &sg.v_address[3] == NULL; - ],[ - AC_MSG_RESULT([yes]) - VIBCPPFLAGS="$VIBCPPFLAGS -DIBNAL_VOIDSTAR_SGADDR=1" - ],[ - AC_MSG_RESULT([no]) - ]) - EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -fi -AC_SUBST(VIBCPPFLAGS) -AC_SUBST(VIBLND) -]) - -# -# LN_CONFIG_RALND -# -# check whether to use the RapidArray lnd -# -AC_DEFUN([LN_CONFIG_RALND], -[#### Rapid Array -AC_MSG_CHECKING([if RapidArray kernel headers are present]) -# placeholder -RACPPFLAGS="-I${LINUX}/drivers/xd1/include" -EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="$EXTRA_KCFLAGS $RACPPFLAGS" -LB_LINUX_TRY_COMPILE([ - #include <linux/types.h> - #include <rapl.h> -],[ - RAP_RETURN rc; - RAP_PVOID dev_handle; - - rc = RapkGetDeviceByIndex(0, NULL, &dev_handle); - - return rc == RAP_SUCCESS ? 0 : 1; -],[ - AC_MSG_RESULT([yes]) - RALND="ralnd" -],[ - AC_MSG_RESULT([no]) - RALND="" - RACPPFLAGS="" -]) -EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save" -AC_SUBST(RACPPFLAGS) -AC_SUBST(RALND) -]) - -# -# LN_STRUCT_PAGE_LIST -# -# 2.6.4 no longer has page->list -# -AC_DEFUN([LN_STRUCT_PAGE_LIST], -[AC_MSG_CHECKING([if struct page has a list field]) -LB_LINUX_TRY_COMPILE([ - #include <linux/mm.h> -],[ - struct page page; - &page.list; -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_STRUCT_SIGHAND -# -# red hat 2.4 adds sighand to struct task_struct -# -AC_DEFUN([LN_STRUCT_SIGHAND], -[AC_MSG_CHECKING([if task_struct has a sighand field]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - struct task_struct p; - p.sighand = NULL; -],[ - AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches]) - AC_MSG_RESULT([yes]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_FUNC_CPU_ONLINE -# -# cpu_online is different in rh 2.4, vanilla 2.4, and 2.6 -# -AC_DEFUN([LN_FUNC_CPU_ONLINE], -[AC_MSG_CHECKING([if kernel defines cpu_online()]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - cpu_online(0); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_TYPE_GFP_T -# -# check if gfp_t is typedef-ed -# -AC_DEFUN([LN_TYPE_GFP_T], -[AC_MSG_CHECKING([if kernel defines gfp_t]) -LB_LINUX_TRY_COMPILE([ - #include <linux/gfp.h> -],[ - return sizeof(gfp_t); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_GFP_T, 1, [gfp_t found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_TYPE_CPUMASK_T -# -# same goes for cpumask_t -# -AC_DEFUN([LN_TYPE_CPUMASK_T], -[AC_MSG_CHECKING([if kernel defines cpumask_t]) -LB_LINUX_TRY_COMPILE([ - #include <linux/sched.h> -],[ - return sizeof (cpumask_t); -],[ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found]) -],[ - AC_MSG_RESULT([no]) -]) -]) - -# -# LN_FUNC_SHOW_TASK -# -# we export show_task(), but not all kernels have it (yet) -# -AC_DEFUN([LN_FUNC_SHOW_TASK], -[AC_MSG_CHECKING([if kernel exports show_task]) -have_show_task=0 -for file in ksyms sched ; do - if grep -q "EXPORT_SYMBOL(show_task)" \ - "$LINUX/kernel/$file.c" 2>/dev/null ; then - have_show_task=1 - break - fi -done -if test x$have_show_task = x1 ; then - AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi -]) - -# LN_TASKLIST_LOCK -# 2.6.18 remove tasklist_lock export -AC_DEFUN([LN_TASKLIST_LOCK], -[AC_MSG_CHECKING([kernel export tasklist_lock]) - if grep -q "EXPORT_SYMBOL(tasklist_lock)" \ - "$LINUX/kernel/fork.c" 2>/dev/null ; then - AC_DEFINE(HAVE_TASKLIST_LOCK, 1, - [tasklist_lock exported]) - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -]) - -# 2.6.19 API changes -# kmem_cache_destroy(cachep) return void instead of -# int -AC_DEFUN([LN_KMEM_CACHE_DESTROY_INT], -[AC_MSG_CHECKING([kmem_cache_destroy(cachep) return int]) -LB_LINUX_TRY_COMPILE([ - #include <linux/slab.h> -],[ - int i = kmem_cache_destroy(NULL); -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1, - [kmem_cache_destroy(cachep) return int]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -#2.6.19 API change -#panic_notifier_list use atomic_notifier operations -# -AC_DEFUN([LN_ATOMIC_PANIC_NOTIFIER], -[AC_MSG_CHECKING([panic_notifier_list is atomic]) -LB_LINUX_TRY_COMPILE([ - #include <linux/notifier.h> - #include <linux/kernel.h> -],[ - struct atomic_notifier_head panic_notifier_list; -],[ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1, - [panic_notifier_list is atomic_notifier_head]) -],[ - AC_MSG_RESULT(NO) -]) -]) - -# -# LN_PROG_LINUX -# -# LNet linux kernel checks -# -AC_DEFUN([LN_PROG_LINUX], -[ -LN_FUNC_CPU_ONLINE -LN_TYPE_GFP_T -LN_TYPE_CPUMASK_T -LN_CONFIG_AFFINITY -LN_CONFIG_BACKOFF -LN_CONFIG_PANIC_DUMPLOG -LN_CONFIG_QUADRICS -LN_CONFIG_GM -LN_CONFIG_OPENIB -LN_CONFIG_CIB -LN_CONFIG_VIB -LN_CONFIG_IIB -LN_CONFIG_O2IB -LN_CONFIG_RALND -LN_CONFIG_PTLLND -LN_CONFIG_MX - -LN_STRUCT_PAGE_LIST -LN_STRUCT_SIGHAND -LN_FUNC_SHOW_TASK -# 2.6.18 -LN_TASKLIST_LOCK -# 2.6.19 -LN_KMEM_CACHE_DESTROY_INT -LN_ATOMIC_PANIC_NOTIFIER -]) - -# -# LN_PROG_DARWIN -# -# Darwin checks -# -AC_DEFUN([LN_PROG_DARWIN], -[LB_DARWIN_CHECK_FUNCS([get_preemption_level]) -]) - -# -# LN_PATH_DEFAULTS -# -# default paths for installed files -# -AC_DEFUN([LN_PATH_DEFAULTS], -[ -]) - -# -# LN_CONFIGURE -# -# other configure checks -# -AC_DEFUN([LN_CONFIGURE], -[# lnet/utils/portals.c -AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h endian.h sys/ioctl.h]) -AC_CHECK_FUNCS([gethostbyname socket connect]) - -# lnet/utils/debug.c -AC_CHECK_HEADERS([linux/version.h]) - -AC_CHECK_TYPE([spinlock_t], - [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])], - [], - [#include <linux/spinlock.h>]) - -# lnet/utils/wirecheck.c -AC_CHECK_FUNCS([strnlen]) - -# -------- Check for required packages -------------- - -# -# LC_CONFIG_READLINE -# -# Build with readline -# -AC_MSG_CHECKING([whether to enable readline support]) -AC_ARG_ENABLE(readline, - AC_HELP_STRING([--disable-readline], - [disable readline support]), - [],[enable_readline='yes']) -AC_MSG_RESULT([$enable_readline]) - -# -------- check for readline if enabled ---- -if test x$enable_readline = xyes ; then - LIBS_save="$LIBS" - LIBS="-lncurses $LIBS" - AC_CHECK_LIB([readline],[readline],[ - LIBREADLINE="-lreadline -lncurses" - AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available]) - ],[ - LIBREADLINE="" - ]) - LIBS="$LIBS_save" -else - LIBREADLINE="" -fi -AC_SUBST(LIBREADLINE) - -AC_MSG_CHECKING([if efence debugging support is requested]) -AC_ARG_ENABLE(efence, - AC_HELP_STRING([--enable-efence], - [use efence library]), - [],[enable_efence='no']) -AC_MSG_RESULT([$enable_efence]) -if test "$enable_efence" = "yes" ; then - LIBEFENCE="-lefence" - AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested]) -else - LIBEFENCE="" -fi -AC_SUBST(LIBEFENCE) - -# -------- enable acceptor libwrap (TCP wrappers) support? ------- -AC_MSG_CHECKING([if libwrap support is requested]) -AC_ARG_ENABLE([libwrap], - AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]), - [case "${enableval}" in - yes) enable_libwrap=yes ;; - no) enable_libwrap=no ;; - *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;; - esac],[enable_libwrap=no]) -AC_MSG_RESULT([$enable_libwrap]) -if test x$enable_libwrap = xyes ; then - LIBWRAP="-lwrap" - AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested]) -else - LIBWRAP="" -fi -AC_SUBST(LIBWRAP) - -# -------- check for -lpthread support ---- -AC_MSG_CHECKING([whether to use libpthread for lnet library]) -AC_ARG_ENABLE([libpthread], - AC_HELP_STRING([--disable-libpthread], - [disable libpthread]), - [],[enable_libpthread=yes]) -if test "$enable_libpthread" = "yes" ; then - AC_CHECK_LIB([pthread], [pthread_create], - [ENABLE_LIBPTHREAD="yes"], - [ENABLE_LIBPTHREAD="no"]) - if test "$ENABLE_LIBPTHREAD" = "yes" ; then - AC_MSG_RESULT([$ENABLE_LIBPTHREAD]) - PTHREAD_LIBS="-lpthread" - AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread]) - else - PTHREAD_LIBS="" - AC_MSG_RESULT([no libpthread is found]) - fi - AC_SUBST(PTHREAD_LIBS) -else - AC_MSG_RESULT([no (disabled explicitly)]) - ENABLE_LIBPTHREAD="no" -fi -AC_SUBST(ENABLE_LIBPTHREAD) - -# ---------------------------------------- -# some tests for catamount-like systems -# ---------------------------------------- -AC_ARG_ENABLE([sysio_init], - AC_HELP_STRING([--disable-sysio-init], - [call sysio init functions when initializing liblustre]), - [],[enable_sysio_init=yes]) -AC_MSG_CHECKING([whether to initialize libsysio]) -AC_MSG_RESULT([$enable_sysio_init]) -if test x$enable_sysio_init != xno ; then - AC_DEFINE([INIT_SYSIO], 1, [call sysio init functions]) -fi - -AC_ARG_ENABLE([urandom], - AC_HELP_STRING([--disable-urandom], - [disable use of /dev/urandom for liblustre]), - [],[enable_urandom=yes]) -AC_MSG_CHECKING([whether to use /dev/urandom for liblustre]) -AC_MSG_RESULT([$enable_urandom]) -if test x$enable_urandom != xno ; then - AC_DEFINE([LIBLUSTRE_USE_URANDOM], 1, [use /dev/urandom for random data]) -fi - -# -------- check for -lcap support ---- -if test x$enable_liblustre = xyes ; then - AC_CHECK_LIB([cap], [cap_get_proc], - [ - CAP_LIBS="-lcap" - AC_DEFINE([HAVE_LIBCAP], 1, [use libcap]) - ], - [ - CAP_LIBS="" - ]) - AC_SUBST(CAP_LIBS) - -fi - -LN_CONFIG_MAX_PAYLOAD -LN_CONFIG_UPTLLND -LN_CONFIG_USOCKLND -]) - -# -# LN_CONDITIONALS -# -# AM_CONDITOINAL defines for lnet -# -AC_DEFUN([LN_CONDITIONALS], -[AM_CONDITIONAL(BUILD_QSWLND, test x$QSWLND = "xqswlnd") -AM_CONDITIONAL(BUILD_GMLND, test x$GMLND = "xgmlnd") -AM_CONDITIONAL(BUILD_MXLND, test x$MXLND = "xmxlnd") -AM_CONDITIONAL(BUILD_O2IBLND, test x$O2IBLND = "xo2iblnd") -AM_CONDITIONAL(BUILD_OPENIBLND, test x$OPENIBLND = "xopeniblnd") -AM_CONDITIONAL(BUILD_CIBLND, test x$CIBLND = "xciblnd") -AM_CONDITIONAL(BUILD_IIBLND, test x$IIBLND = "xiiblnd") -AM_CONDITIONAL(BUILD_VIBLND, test x$VIBLND = "xviblnd") -AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd") -AM_CONDITIONAL(BUILD_PTLLND, test x$PTLLND = "xptllnd") -AM_CONDITIONAL(BUILD_UPTLLND, test x$UPTLLND = "xptllnd") -AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd") -]) - -# -# LN_CONFIG_FILES -# -# files that should be generated with AC_OUTPUT -# -AC_DEFUN([LN_CONFIG_FILES], -[AC_CONFIG_FILES([ -lnet/Kernelenv -lnet/Makefile -lnet/autoMakefile -lnet/autoconf/Makefile -lnet/doc/Makefile -lnet/include/Makefile -lnet/include/libcfs/Makefile -lnet/include/libcfs/linux/Makefile -lnet/include/lnet/Makefile -lnet/include/lnet/linux/Makefile -lnet/klnds/Makefile -lnet/klnds/autoMakefile -lnet/klnds/gmlnd/Makefile -lnet/klnds/mxlnd/autoMakefile -lnet/klnds/mxlnd/Makefile -lnet/klnds/gmlnd/autoMakefile -lnet/klnds/openiblnd/Makefile -lnet/klnds/openiblnd/autoMakefile -lnet/klnds/o2iblnd/Makefile -lnet/klnds/o2iblnd/autoMakefile -lnet/klnds/ciblnd/Makefile -lnet/klnds/ciblnd/autoMakefile -lnet/klnds/iiblnd/Makefile -lnet/klnds/iiblnd/autoMakefile -lnet/klnds/viblnd/Makefile -lnet/klnds/viblnd/autoMakefile -lnet/klnds/qswlnd/Makefile -lnet/klnds/qswlnd/autoMakefile -lnet/klnds/ralnd/Makefile -lnet/klnds/ralnd/autoMakefile -lnet/klnds/socklnd/Makefile -lnet/klnds/socklnd/autoMakefile -lnet/klnds/ptllnd/Makefile -lnet/klnds/ptllnd/autoMakefile -lnet/libcfs/Makefile -lnet/libcfs/autoMakefile -lnet/libcfs/linux/Makefile -lnet/lnet/Makefile -lnet/lnet/autoMakefile -lnet/tests/Makefile -lnet/tests/autoMakefile -lnet/ulnds/Makefile -lnet/ulnds/autoMakefile -lnet/ulnds/socklnd/Makefile -lnet/ulnds/ptllnd/Makefile -lnet/utils/Makefile -]) -case $lb_target_os in - darwin) - AC_CONFIG_FILES([ -lnet/include/libcfs/darwin/Makefile -lnet/include/lnet/darwin/Makefile -lnet/libcfs/darwin/Makefile -]) - ;; -esac -]) diff --git a/lnet/doc/.cvsignore b/lnet/doc/.cvsignore deleted file mode 100644 index 827dca41301e5e078edf5fcef01e7cbc0b081b10..0000000000000000000000000000000000000000 --- a/lnet/doc/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -Makefile.in -*.eps -*.pdf diff --git a/lnet/doc/Data-structures b/lnet/doc/Data-structures deleted file mode 100644 index b5532b1b8d1b96c06f385d0bf218df6a38165726..0000000000000000000000000000000000000000 --- a/lnet/doc/Data-structures +++ /dev/null @@ -1,65 +0,0 @@ -In this document I will try to draw the data structures and how they -interrelate in the Portals 3 reference implementation. It is probably -best shown with a drawing, so there may be an additional xfig or -Postscript figure. - - -MEMORY POOLS: ------------- - -First, a digression on memory allocation in the library. As mentioned -in the NAL Writer's Guide, the library does not link against any -standard C libraries and as such is unable to dynamically allocate -memory on its own. It requires that the NAL implement a method -for allocation that is appropriate for the protection domain in -which the library lives. This is only called when a network -interface is initialized to allocate the Portals object pools. - -These pools are preallocate blocks of objects that the library -can rapidly make active and manage with a minimum of overhead. -It is also cuts down on overhead for setting up structures -since the NAL->malloc() callback does not need to be called -for each object. - -The objects are maintained on a per-object type singly linked free -list and contain a pointer to the next free object. This pointer -is NULL if the object is not on the free list and is non-zero -if it is on the list. The special sentinal value of 0xDEADBEEF -is used to mark the end of the free list since NULL could -indicate that the last object in the list is not free. - -When one of the lib_*_alloc() functions is called, the library -returns the head of the free list and advances the head pointer -to the next item on the list. The special case of 0xDEADBEEF is -checked and a NULL pointer is returned if there are no more -objects of this type available. The lib_*_free() functions -are even simpler -- check to ensure that the object is not already -free, set its next pointer to the current head and then set -the head to be this newly freed object. - -Since C does not have templates, I did the next best thing and wrote -the memory pool allocation code as a macro that expands based on the -type of the argument. The mk_alloc(T) macro expands to -write the _lib_T_alloc() and lib_T_free() functions. -It requires that the object have a pointer of the type T named -"next_free". There are also functions that map _lib_T_alloc() -to lib_T_alloc() so that the library can add some extra -functionality to the T constructor. - - - -LINKED LISTS: ------------- - -Many of the active Portals objects are stored in doubly linked lists -when they are active. These are always implemented with the pointer -to the next object and a pointer to the next pointer of the -previous object. This avoids the "dummy head" object or -special cases for inserting at the beginning or end of the list. -The pointer manipulations are a little hairy at times, but -I hope that they are understandable. - -The actual linked list code is implemented as macros in <lib-p30.h>, -although the object has to know about - - diff --git a/lnet/doc/Makefile.am b/lnet/doc/Makefile.am deleted file mode 100644 index b7f6252f34a539174f2cfe60a36336442e84b1b3..0000000000000000000000000000000000000000 --- a/lnet/doc/Makefile.am +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -LYX2PDF = lyx --export pdf -LYX2TXT = lyx --export text -LYX2HTML = lyx --export html -SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps - -if DOC - DOCS = portals3.pdf -else - DOCS = -endif - -IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps -LYXFILES= portals3.lyx - -MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED) -GENERATED = -EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) - -all: $(DOCS) - -# update date and version in document -date := $(shell date +%x) -tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') -addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g' - -# Regenerate when the $(VERSION) or $Name: $ changes. -.INTERMEDIATE: $(GENERATED) -$(GENERATED) : %.lyx: %.lin Makefile - $(addversion) $< > $@ - -.lyx.pdf: - @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" - -.lyx.txt: - @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" -.lyx.html: - @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" -.fig.eps: - -fig2dev -L eps $< > $@ - -portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx - -syncweb: portals3.pdf -# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf -# ( cd /usr/src/www ; make lustre ; make synclustre ) - diff --git a/lnet/doc/Message-life-cycle b/lnet/doc/Message-life-cycle deleted file mode 100644 index e8cc7e21b2276de5e9caed367cd9d83ce6a85955..0000000000000000000000000000000000000000 --- a/lnet/doc/Message-life-cycle +++ /dev/null @@ -1,118 +0,0 @@ -This documents the life cycle of message as it arrives and is handled by -a basic async, packetized NAL. There are four types of messages that have -slightly different life cycles, so they are addressed independently. - - -Put request ------------ - -1. NAL notices that there is a incoming message header on the network -and reads an ptl_hdr_t in from the wire. - -2. It may store additional NAL specific data that provides context -for this event in a void* that it will interpret in some fashion -later. - -3. The NAL calls lib_parse() with a pointer to the header and its -private data structure. - -4. The library decodes the header and may build a message state -object that describes the event to be written and the ACK to be -sent, if any. It then calls nal->recv() with the private data -that the NAL passed in, a pointer to the message state object -and a translated user address. - - The NAL will have been given a chance to pretranslate - all user addresses when the buffers are created. This - process is described in the NAL-HOWTO. - -5. The NAL should restore what ever context it required from the -private data pointer, begin receiving the bytes and possibly store -some extra state of its own. It should return at this point. - - - -Get request ------------ - -1. As with a Put, the NAL notices the incoming message header and -passes it to lib_parse(). - -2. The library decodes the header and calls nal->recv() with a -zero byte length, offset and destination to instruct it to clean -up the wire after reading the header. The private data will -be passed in as well, allowing the NAL to retrieve any state -or context that it requires. - -3. The library may build a message state object to possibly -write an event log or invalidate a memory region. - -4. The library will build a ptl_msg_t header that specifies the -Portals protocol information for delivery at the remote end. - -5. The library calls nal->send() with the pre-built header, -the optional message state object, the four part address -component, a translated user pointer + offset, and some -other things. - -6. The NAL is to put the header on the wire or copy it at -this point (since it off the stack). It should store some -amount of state about its current position in the message and -the destination address. - -7. And then return to the library. - - -Reply request -------------- - -1. Starting at "The library decodes the header..." - -2. The library decodes the header and calls nal->recv() -to bring in the rest of the message. Flow continues in -exactly the same fashion as with all other receives. - - -Ack request ------------ - -1. The library decodes the header, builds the appropriate data -structures for the event in a message state object and calls nal->recv() -with a zero byte length, etc. - - -Packet arrival --------------- - -1. The NAL should notice the arrival of a packet, retrieve whatever -state it needs from the message ID or other NAL specific header data -and place the data bytes directly into the user address that were -given to nal->recv(). - - How this happens is outside the scope of the Portals library - and soley determined by the NAL... - -2. If this is the last packet in a message, the NAL should retrieve -the lib_msg_t *cookie that it was given in the call to nal->recv() -and pass it to lib_finalize(). lib_finalize() may call nal->send() -to send an ACK, nal->write() to record an entry in the event log, -nal->invalidate() to unregister a region of memory or do nothing at all. - -3. It should then clean up any remaining NAL specific state about -the message and go back into the main loop. - - -Outgoing packets ----------------- - -1. When the NAL has pending output, it should put the packets on -the wire wrapped with whatever implementation specified wrappers. - -2. Once it has output all the packets of a message it should -call lib_finalize() with the message state object that was -handed to nal->send(). This will allows the library to clean -up its state regarding the message and write any pending event -entries. - - - diff --git a/lnet/doc/NAL-HOWTO b/lnet/doc/NAL-HOWTO deleted file mode 100644 index ea38aed09e195452b6696ed73f133e247073a1af..0000000000000000000000000000000000000000 --- a/lnet/doc/NAL-HOWTO +++ /dev/null @@ -1,293 +0,0 @@ -This document is a first attempt at describing how to write a NAL -for the Portals 3 library. It also defines the library architecture -and the abstraction of protection domains. - - -First, an overview of the architecture: - - Application - -----|----+-------- - | - API === NAL (User space) - | ----------+---|----- - | - LIB === NAL (Library space) - | ----------+---|----- - - Physical wire (NIC space) - - -Application - API -API-side NAL ------------- -LIB-side NAL - LIB -LIB-side NAL - wire - -Communication is through the indicated paths via well defined -interfaces. The API and LIB portions are written to be portable -across platforms and do not depend on the network interface. - -Communcation between the application and the API code is -defined in the Portals 3 API specification. This is the -user-visible portion of the interface and should be the most -stable. - - - -API-side NAL: ------------- - -The user space NAL needs to implement only a few functions -that are stored in a nal_t data structure and called by the -API-side library: - - int forward( nal_t *nal, - int index, - void *args, - size_t arg_len, - void *ret, - size_t ret_len - ); - -Most of the data structures in the portals library are held in -the LIB section of the code, so it is necessary to forward API -calls across the protection domain to the library. This is -handled by the NAL's forward method. Once the argument and return -blocks are on the remote side the NAL should call lib_dispatch() -to invoke the appropriate API function. - - int validate( nal_t *nal, - void *base, - size_t extent, - void **trans_base, - void **trans_data - ); - -The validate method provides a means for the NAL to prevalidate -and possibly pretranslate user addresses into a form suitable -for fast use by the network card or kernel module. The trans_base -pointer will be used by the library everytime it needs to -refer to the block of memory. The trans_data result is a -cookie that will be handed to the NAL along with the trans_base. - -The library never performs calculations on the trans_base value; -it only computes offsets that are then handed to the NAL. - - - int shutdown( nal_t *nal, int interface ); - -Brings down the network interface. The remote NAL side should -call lib_fini() to bring down the library side of the network. - - void yield( nal_t *nal ); - -This allows the user application to gracefully give up the processor -while busy waiting. Performance critical applications may not -want to take the time to call this function, so it should be an -option to the PtlEQWait call. Right now it is not implemented as such. - -Lastly, the NAL must implement a function named PTL_IFACE_*, where -* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR. -This initialization function is to set up communication with the -library-side NAL, which should call lib_init() to bring up the -network interface. - - - -LIB-side NAL: ------------- - -On the library-side, the NAL has much more responsibility. It -is responsible for calling lib_dispatch() on behalf of the user, -it is also responsible for bringing packets off the wire and -pushing bits out. As on the user side, the methods are stored -in a nal_cb_t structure that is defined on a per network -interface basis. - -The calls to lib_dispatch() need to be examined. The prototype: - - void lib_dispatch( - nal_cb_t *nal, - void *private, - int index, - void *arg_block, - void *ret_block - ); - -has two complications. The private field is a NAL-specific -value that will be passed to any callbacks produced as a result -of this API call. Kernel module implementations may use this -for task structures, or perhaps network card data. It is ignored -by the library. - -Secondly, the arg_block and ret_block must be in the same protection -domain as the library. The NAL's two halves must communicate the -sizes and perform the copies. After the call, the buffer pointed -to by ret_block will be filled in and should be copied back to -the user space. How this is to be done is NAL specific. - - int lib_parse( - nal_cb_t *nal, - ptl_hdr_t *hdr, - void *private - ); - -This is the only other entry point into the library from the NAL. -When the NAL detects an incoming message on the wire it should read -sizeof(ptl_hdr_t) bytes and pass a pointer to the header to -lib_parse(). It may set private to be anything that it needs to -tie the incoming message to callbacks that are made as a result -of this event. - -The method calls are: - - int (*send)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int nid, - int pid, - int gid, - int rid, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t len - ); - -This is a tricky function -- it must support async output -of messages as well as properly syncronized event log writing. -The private field is the same that was passed into lib_dispatch() -or lib_parse() and may be used to tie this call to the event -that initiated the entry to the library. - -The cookie is a pointer to a library private value that must -be passed to lib_finalize() once the message has been completely -sent. It should not be examined by the NAL for any meaning. - -The four ID fields are passed in, although some implementations -may not use all of them. - -The single base pointer has been replaced with the translated -address that the API NAL generated in the api_nal->validate() -call. The trans_data is unchanged and the offset is in bytes. - - - int (*recv)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t mlen, - size_t rlen - ); - -This callback will only be called in response to lib_parse(). -The cookie, trans_addr and trans_data are as discussed in send(). -The NAL should read mlen bytes from the wire, deposit them into -trans_base + offset and then discard (rlen - mlen) bytes. -Once the entire message has been received the NAL should call -lib_finalize() with the lib_msg_t *cookie. - -The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0 -is used to indicate that the NAL should clean up the wire. This could -be implemented as a blocking call, although having it return as quickly -as possible is desirable. - - int (*write)( - nal_cb_t *nal, - void *private, - user_ptr trans_addr, - user_ptr trans_data, - size_t offset, - - void *src_addr, - size_t len - ); - -This is essentially a cross-protection domain memcpy(). The user address -has been pretranslated by the api_nal->translate() call. - - void *(*malloc)( - nal_cb_t *nal, - size_t len - ); - - void (*free)( - nal_cb_t *nal, - void *buf - ); - -Since the NAL may be in a non-standard hosted environment it can -not call malloc(). This allows the library side NAL to implement -the system specific malloc(). In the current reference implementation -the libary only calls nal->malloc() when the network interface is -initialized and then calls free when it is brought down. The library -maintains its own pool of objects for allocation so only one call to -malloc is made per object type. - - void (*invalidate)( - nal_cb_t *nal, - user_ptr trans_base, - user_ptr trans_data, - size_t extent - ); - -User addresses are validated/translated at the user-level API NAL -method, which is likely to push them to this level. Meanwhile, -the library NAL will be notified when the library no longer -needs the buffer. Overlapped buffers are not detected by the -library, so the NAL should ref count each page involved. - -Unfortunately we have a few bugs when the invalidate method is -called. It is still in progress... - - void (*printf)( - nal_cb_t *nal, - const char *fmt, - ... - ); - -As with malloc(), the library does not have any way to do printf -or printk. It is not necessary for the NAL to implement the this -call, although it will make debugging difficult. - - void (*cli)( - nal_cb_t *nal, - unsigned long *flags - ); - - void (*sti)( - nal_cb_t *nal, - unsigned long *flags - ); - -These are used by the library to mark critical sections. - - int (*gidrid2nidpid)( - nal_cb_t *nal, - ptl_id_t gid, - ptl_id_t rid, - ptl_id_t *nid, - ptl_id_t *pid - ); - - - int (*nidpid2gidrid)( - nal_cb_t *nal, - ptl_id_t nid, - ptl_id_t pid, - ptl_id_t *gid, - ptl_id_t *rid - ); - -Rolf added these. I haven't looked at how they have to work yet. diff --git a/lnet/doc/file.fig b/lnet/doc/file.fig deleted file mode 100644 index 914c2941d921c4e1106d2a88a149a95e99d8af14..0000000000000000000000000000000000000000 --- a/lnet/doc/file.fig +++ /dev/null @@ -1,111 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1200 750 1650 1050 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1050 1650 750 1200 750 1200 1050 1650 1050 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001 --6 -6 1200 2325 1650 2625 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001 --6 -6 1200 1800 1650 2100 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001 --6 -6 1200 1275 1650 1575 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001 --6 -6 450 750 900 1200 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 825 450 1050 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1050 900 825 --6 -6 450 2325 900 2775 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 2400 450 2625 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2625 900 2400 --6 -6 450 1800 900 2250 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1875 450 2100 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2100 900 1875 --6 -6 450 1275 900 1725 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1350 450 1575 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1575 900 1350 --6 -6 2250 750 3450 2625 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1200 3150 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1500 3150 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1800 3150 1800 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2100 3150 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 975 3150 975 3150 2625 2550 2625 2550 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2400 3150 2400 -4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2400 2550 1350 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1875 2550 1050 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1425 2550 1950 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 900 2550 1650 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 900 1200 900 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1425 1200 1425 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1950 1200 1950 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2475 1200 2475 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2025 2550 2250 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2550 2550 2475 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1875 2850 1875 600 225 600 225 2850 1875 2850 -4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001 diff --git a/lnet/doc/flow_new.fig b/lnet/doc/flow_new.fig deleted file mode 100644 index d828dea8b7b318fb89d6fb9ff3095b01509dd568..0000000000000000000000000000000000000000 --- a/lnet/doc/flow_new.fig +++ /dev/null @@ -1,213 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 525 2175 1575 2925 -6 675 2287 1425 2812 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001 -4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 2550 1050 2175 525 2550 1050 2925 1575 2550 --6 -6 3450 1275 4350 1725 -6 3600 1312 4200 1687 -4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001 -4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275 --6 -6 4650 1275 5550 1725 -6 4725 1312 5475 1687 -4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001 -4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275 --6 -6 1350 525 2250 975 -6 1350 562 2250 937 -4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001 -4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 525 2250 525 2250 975 1350 975 1350 525 --6 -6 525 1125 1575 1875 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 1500 1050 1125 525 1500 1050 1875 1575 1500 -4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001 --6 -6 2340 1237 2940 1687 -6 2340 1237 2940 1687 -4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001 -4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001 -4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001 --6 --6 -6 525 3225 1575 3975 -6 675 3375 1425 3750 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001 --6 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 525 3600 1050 3225 1575 3600 1050 3975 525 3600 --6 -6 3300 3375 4350 3825 -6 3300 3412 4350 3787 -4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375 --6 -6 1950 3225 3000 3975 -6 2250 3450 2700 3750 -4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600 --6 -6 3150 4500 4200 4950 -6 3150 4537 4200 4912 -4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500 --6 -6 600 4500 1500 4950 -6 675 4537 1425 4912 -4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001 -4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 600 4500 1500 4500 1500 4950 600 4950 600 4500 --6 -6 4650 4350 5700 5100 -6 4950 4537 5400 4912 -6 4950 4537 5400 4912 -4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001 -4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001 --6 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725 --6 -6 6000 4500 6900 4950 -6 6225 4575 6675 4875 -4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001 -4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500 --6 -6 1800 4350 2850 5100 -6 2100 4575 2550 4875 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 1875 1050 2175 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 1500 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 450 1050 1125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1350 750 1050 750 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 2925 1050 3225 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3150 1500 3450 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4350 1500 4650 1500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 3600 1950 3600 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 3975 1050 4500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 3600 3300 3600 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 4725 1800 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 5700 4725 6000 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2850 4725 3150 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4200 4725 4650 4725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 6900 4725 7950 4725 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 2250 750 2475 750 2625 750 2625 900 2625 1125 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125 - 4425 4275 4425 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125 - 7275 4275 7275 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001 -4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001 diff --git a/lnet/doc/get.fig b/lnet/doc/get.fig deleted file mode 100644 index 28db949a47fb2e7fa45a1f74087c6e58c25ed9a6..0000000000000000000000000000000000000000 --- a/lnet/doc/get.fig +++ /dev/null @@ -1,33 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 2775 900 3525 1200 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001 --6 -6 1350 1725 2175 2025 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 750 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 825 2700 1275 -2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1350 900 1950 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 diff --git a/lnet/doc/ieee.bst b/lnet/doc/ieee.bst deleted file mode 100644 index 4df7c50b7b30c4728257449ff99eef57a2ebb184..0000000000000000000000000000000000000000 --- a/lnet/doc/ieee.bst +++ /dev/null @@ -1,1112 +0,0 @@ -% --------------------------------------------------------------- -% -% by Paolo.Ienne@di.epfl.ch -% -% --------------------------------------------------------------- -% -% no guarantee is given that the format corresponds perfectly to -% IEEE 8.5" x 11" Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% `ieee' from BibTeX standard bibliography style `abbrv' -% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. -% Copyright (C) 1985, all rights reserved. -% Copying of this file is authorized only if either -% (1) you make absolutely no changes to your copy, including name, or -% (2) if you do make changes, you name it something other than -% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. -% This restriction helps ensure that all standard styles are identical. -% The file btxbst.doc has the documentation for this style. - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", editors" * } - { ", editor" * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pages" pages n.dashify tie.or.space.connect } - { "page" pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "(" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":" * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Softw. Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Programming"} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Gr."} - -MACRO {toms} {"ACM Trans. Math. Softw."} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} - -MACRO {tcs} {"Theoretical Comput. Sci."} - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { nameptr #1 > - { " " * } - 'skip$ - if$ - s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := - nameptr numnames = t "others" = and - { "et al" * } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * - "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -% end of file ieee.bst -% --------------------------------------------------------------- diff --git a/lnet/doc/mpi.fig b/lnet/doc/mpi.fig deleted file mode 100644 index e1a91b5930d3cb193a8b05b9c04ad6beb74dc3db..0000000000000000000000000000000000000000 --- a/lnet/doc/mpi.fig +++ /dev/null @@ -1,117 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 150 1650 900 2025 -4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001 -4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001 --6 -6 150 150 900 525 -4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001 -4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001 --6 -6 2550 4125 3150 4725 -4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001 -4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001 -4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001 --6 -6 1050 1575 1950 1875 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001 --6 -6 5400 1575 6300 2175 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001 --6 -6 5400 2400 6300 3000 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001 --6 -6 1050 2400 1950 2700 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001 --6 -6 1050 825 1950 1125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 825 1950 825 1950 1125 1050 1125 1050 825 -4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1575 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2025 4050 3375 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 675 6600 675 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 1350 6600 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 4500 4050 3675 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 1725 5400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2550 5400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2850 4050 3450 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1800 1500 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 825 3300 825 3300 1275 2400 1275 2400 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 2625 1500 4125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 300 1500 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 975 2400 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 1725 2400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 2550 2400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 4275 2400 4275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300 -4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001 -4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001 -4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001 -4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001 -4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001 -4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001 -4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001 diff --git a/lnet/doc/portals.fig b/lnet/doc/portals.fig deleted file mode 100644 index 9b1271bd65ed6f24e4de38243b250b2381028d78..0000000000000000000000000000000000000000 --- a/lnet/doc/portals.fig +++ /dev/null @@ -1,68 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 900 1650 900 1650 1200 1350 1200 1350 900 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 4200 375 4200 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 525 600 1125 600 1125 2100 525 2100 525 600 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 1425 4425 1425 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3600 825 3750 825 3750 1125 3600 1125 3600 825 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2025 1425 2550 1425 -2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 4425 750 4875 750 4875 1125 4425 1125 4425 750 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3675 975 4425 975 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2 - 0 0 1.00 60.00 120.00 - 825 1050 1350 1050 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 975 1125 975 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 1125 1125 1125 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7 - 0 0 1.00 60.00 120.00 - 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975 - 3600 975 - 0.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001 -4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001 -4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001 -4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001 -4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001 -4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001 -4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001 diff --git a/lnet/doc/portals3.bib b/lnet/doc/portals3.bib deleted file mode 100644 index 323b99fa58534eb04406f4e27feb36ab5fdfc8b1..0000000000000000000000000000000000000000 --- a/lnet/doc/portals3.bib +++ /dev/null @@ -1,124 +0,0 @@ -@Article{ Cplant, - title = { {M}assively {P}arallel {C}omputing with - {C}ommodity {C}omponents }, - author = { Ron Brightwell and David S. Greenberg and Arthur - B. Maccabe and Rolf Riesen }, - journal = { Parallel Computing }, - volume = { 26 }, - month = { February }, - pages = { 243-266 }, - year = { 2000 } -} - -@Manual{ Portals, - organization = { Sandia National Laboratories }, - title = { {P}uma {P}ortals }, - note = { http://www.cs.sandia.gov/puma/portals }, - year = { 1997 } -} - -@Techreport{ VIA, - title = { {V}irtual {I}nterface {A}rchitecture - {S}pecification {V}ersion 1.0 }, - author = { {Compaq, Microsoft, and Intel} }, - institution = { Compaq, Microsoft, and Intel }, - month = { December }, - year = { 1997 } -} - -@Techreport{ ST, - title = { {I}nformation {T}echnology - {S}cheduled - {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 }, - author = { {Task Group of Technical Committee T11} }, - institution = { Accredited Standards Committee NCITS }, - month = { July }, - year = { 1998 } -} - -@Manual{ TFLOPS, - organization = { Sandia National Laboratories }, - title = { ASCI Red }, - note = { http://www.sandia.gov/ASCI/TFLOP }, - year = { 1996 } -} - -@Techreport{ GM, - title = { The {GM} {M}essage {P}assing {S}ystem }, - author = { {Myricom, Inc.} }, - institution = { {Myricom, Inc.} }, - year = { 1997 }, -} - -@Article{ MPIstandard, - title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard }, - author = { {Message Passing Interface Forum} }, - journal = { The International Journal of Supercomputer Applications - and High Performance Computing }, - volume = { 8 }, - year = { 1994 } -} - -@Inproceedings{ PumaOS, - author = "Lance Shuler and Chu Jong and Rolf Riesen and - David van Dresser and Arthur B. Maccabe and - Lee Ann Fisk and T. Mack Stallcup", - booktitle = "Proceeding of the 1995 Intel Supercomputer - User's Group Conference", - title = "The {P}uma Operating System for Massively Parallel Computers", - organization = "Intel Supercomputer User's Group", - year = 1995 -} - -@InProceedings{ SUNMOS, -author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and - Stephen R. Wheat", -title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide", -booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994 - Annual North America Users' Conference.", -year = 1994, -pages = "245--251", -month = "June", -location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps" -} - -@InProceedings { PumaMPI, - title = { Design and Implementation of {MPI} on {P}uma Portals }, - author = { Ron Brightwell and Lance Shuler }, - booktitle = { Proceedings of the Second MPI Developer's Conference }, - pages = { 18-25 }, - month = { July }, - year = { 1996 } -} - -@Inproceedings{ FM2, - author = { Mario Lauria and Scott Pakin and Andrew Chien }, - title = { {E}fficient {L}ayering for {H}igh {S}peed - {C}ommunication: {F}ast {M}essages 2.x }, - Booktitle = { Proceedings of the IEEE International Symposium - on High Performance Distributed Computing }, - year = { 1998 } -} - -@Manual { CraySHMEM, - title = "SHMEM Technical Note for C, SG-2516 2.3", - organization = "Cray Research, Inc.", - month = "October", - year = 1994 -} - -@Manual { MPI2, - title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface", - organization = "Message Passing Interface Forum", - note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html", - month = "July", - year = 1997 -} - -@InProceedings { PMMPI, - title = { {The Design and Implementation of Zero Copy MPI Using - Commodity Hardware with a High Performance Network} }, - author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori - and Yutaka Ishikawa }, - booktitle = { Proceedings of the ICS }, - year = { 1998 } -} diff --git a/lnet/doc/portals3.lyx b/lnet/doc/portals3.lyx deleted file mode 100644 index 84292807e2da8bceb0139bdf52242e0b002b1505..0000000000000000000000000000000000000000 --- a/lnet/doc/portals3.lyx +++ /dev/null @@ -1,15944 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass report -\begin_preamble -\usepackage{fullpage} -\renewenvironment{comment}% -{\begin{quote}\textbf{Discussion}: \slshape}% -{\end{quote}} -\pagestyle{myheadings} -\end_preamble -\language american -\inputencoding auto -\fontscheme pslatex -\graphics default -\paperfontsize 10 -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 2 -\tocdepth 2 -\paragraph_separation indent -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 2 -\paperpagestyle headings - -\layout Title - -The Portals 3.2 Message Passing Interface -\newline - Revision 1.1 -\layout Author - -Ron Brightwell -\begin_inset Foot -collapsed true - -\layout Standard - -R. - Brightwell and R. - Riesen are with the Scalable Computing Systems Department, Sandia National - Laboratories, P.O. - Box 5800, Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov. -\end_inset - -, Arthur B. - Maccabe -\begin_inset Foot -collapsed true - -\layout Standard - -A. - B. - Maccabe is with the Computer Science Department, University of New Mexico, - Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87131-1386, maccabe@cs.unm.edu. -\end_inset - -, Rolf Riesen and Trammell Hudson -\layout Abstract - -This report presents a specification for the Portals 3.2 message passing - interface. - Portals 3.2 is intended to allow scalable, high-performance network communicatio -n between nodes of a parallel computing system. - Specifically, it is designed to support a parallel computing platform composed - of clusters of commodity workstations connected by a commodity system area - network fabric. - In addition, Portals 3.2 is well suited to massively parallel processing - and embedded systems. - Portals 3.2 represents an adaption of the data movement layer developed - for massively parallel processing platforms, such as the 4500-node Intel - TeraFLOPS machine. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -clearpage -\backslash -pagenumbering{roman} -\backslash -setcounter{page}{3} -\end_inset - - -\layout Standard - - -\begin_inset LatexCommand \tableofcontents{} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList figure - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList table - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Chapter* - -Summary of Changes for Revision 1.1 -\layout Enumerate - -Updated version number to 3.2 throughout the document -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sub:PtlGetId} - -\end_inset - -: added -\family typewriter -PTL_SEGV -\family default - to error list for -\shape italic -PtlGetId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: added -\family typewriter -PTL_ML_TOOLONG -\family default - to error list for -\shape italic -PtlMEAttach -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meunlink} - -\end_inset - -: removed text referring to a list of associated memory descriptors. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added text to describe unlinking a free-floating memory descriptor. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added entry for -\family typewriter -ptl_seq_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added definition of -\family typewriter -max_offset -\family default -. -\layout Enumerate - -added text to clarify -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default -. -\end_deeper -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: modified text for -\family typewriter -unlink_op -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: added text to clarify multiple calls to -\shape italic -PtlNIInit -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: added text to clarify -\family typewriter -unlink_nofit -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:receiving} - -\end_inset - -: removed text indicating that an MD will reject a message if the associated - EQ is full. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - error code and text to indicate that only MDs with no pending operations - can be unlinked. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - return code. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added user id field, MD handle field, and NI specific failure field to - the -\family typewriter -ptl_event_t -\family default - structure. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added -\family typewriter -ptl_ni_fail_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added -\family typewriter -PTL_EVENT_UNLINK -\family default - event type. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: removed -\shape slanted -PtlTransId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -: listed allowable constants with relevant fields. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: added -\shape italic -PtlMEAttachAny -\shape default - function. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_PT_FULL -\family default - return code for -\shape italic -PtlMEAttachAny -\shape default -. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - -: updated to reflect new event types. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: added -\family typewriter -ptl_nid_t -\family default -, -\family typewriter -ptl_pid_t -\family default -, and -\family typewriter -ptl_uid_t -\family default -. -\layout Chapter* - -Summary of Changes for Version 3.1 -\layout Section* - -Thread Issues -\layout Standard - -The most significant change to the interface from version 3.0 to 3.1 involves - the clarification of how the interface interacts with multi-threaded applicatio -ns. - We adopted a generic thread model in which processes define an address - space and threads share the address space. - Consideration of the API in the light of threads lead to several clarifications - throughout the document: -\layout Enumerate - -Glossary: -\begin_deeper -\layout Enumerate - -added a definition for -\emph on -thread -\emph default -, -\layout Enumerate - -reworded the definition for -\emph on -process -\emph default -. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - -: added section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:threads} - -\end_inset - - to describe the multi-threading model used by the Portals API. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlinit} - -\end_inset - -: -\emph on -PtlInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlfini} - -\end_inset - -: -\emph on -PtlFini -\emph default - should be called once as the process is terminating and not as each thread - terminates. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -: Portals does not define thread ids. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - -: network interfaces are associated with processes, not threads. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: -\emph on -PtlNIInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqget} - -\end_inset - -: -\emph on -PtlEQGet -\emph default - returns -\family typewriter -PTL_EQ_EMPTY -\family default - if a thread is blocked on -\emph on -PtlEQWait -\emph default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqwait} - -\end_inset - -: waiting threads are awakened in FIFO order. - -\layout Standard - -Two functions, -\emph on -PtlNIBarrier -\emph default - and -\emph on -PtlEQCount -\emph default - were removed from the API. - -\emph on -PtlNIBarrier -\emph default - was defined to block the calling process until all of the processes in - the application group had invoked -\emph on -PtlNIBarrier -\emph default -. - We now consider this functionality, along with the concept of groups (see - the discussion under -\begin_inset Quotes eld -\end_inset - -other changes -\begin_inset Quotes erd -\end_inset - -), to be part of the runtime system, not part of the Portals API. - -\emph on -PtlEQCount -\emph default - was defined to return the number of events in an event queue. - Because external operations may lead to new events being added and other - threads may remove events, the value returned by -\emph on -PtlEQCount -\emph default - would have to be a hint about the number of events in the event queue. -\layout Section* - -Handling small, unexpected messages -\layout Standard - -Another set of changes relates to handling small unexpected messages in - MPI. - In designing version 3.0, we assumed that each unexpected message would - be placed in a unique memory descriptor. - To avoid the need to process a long list of memory descriptors, we moved - the memory descriptors out of the match list and hung them off of a single - match list entry. - In this way, large unexpected messages would only encounter a single -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - match list entry before encountering the -\begin_inset Quotes eld -\end_inset - -long message -\begin_inset Quotes erd -\end_inset - - match list entry. - Experience with this strategy identified resource management problems with - this approach. - In particular, a long sequence of very short (or zero length) messages - could quickly exhaust the memory descriptors constructed for handling unexpecte -d messages. - Our new strategy involves the use of several very large memory descriptors - for small unexpected messages. - Consecutive unexpected messages will be written into the first of these - memory descriptors until the memory descriptor fills up. - When the first of the -\begin_inset Quotes eld -\end_inset - -small memory -\begin_inset Quotes erd -\end_inset - - descriptors fills up, it will be unlinked and subsequent short messages - will be written into the next -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor. - In this case, a -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor will be declared full when it does not have sufficient - space for the largest small unexpected message. -\layout Standard - -This lead to two significant changes. - First, each match list entry now has a single memory descriptor rather - than a list of memory descriptors. - Second, in addition to exceeding the operation threshold, a memory descriptor - can be unlinked when the local offset exceeds a specified value. - These changes have lead to several changes in this document: -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{subsec:paddress} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed references to the memory descriptor list, -\layout Enumerate - -changed the portals address translation description to indicate that unlinking - a memory descriptor implies unlinking the associated match list entry--match - list entries can no longer be unlinked independently from the memory descriptor. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed unlink from argument list, -\layout Enumerate - -removed description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -changed wording of the error condition when the Portal table index already - has an associated match list. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -: removed unlink from argument list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: added -\family typewriter -max_offset -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -removed reference to memory descriptor lists, -\layout Enumerate - -changed wording of the error condition when match list entry already has - an associated memory descriptor, -\layout Enumerate - -changed the description of the -\family typewriter -unlink -\family default - argument. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -: removed -\family typewriter -PtlMDInsert -\family default - operation. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: removed references to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: removed references to PtlMDInsert. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - -: revised the MPI example to reflect the changes to the interface. - -\layout Standard - -Several changes have been made to improve the general documentation of the - interface. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_EQ_NONE -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_ID_ANY -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: documented the return value -\family typewriter -PTL_INV_EQ -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - -: clarified the description of the -\emph on -PtlMDUpdate -\emph default - function. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:implvals} - -\end_inset - -: introduced a new section to document the implementation defined values. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: modified Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - to indicate where each constant is introduced and where it is used. - -\layout Section* - -Other changes -\layout Subsection* - -Implementation defined limits (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -The earlier version provided implementation defined limits for the maximum - number of match entries, the maximum number of memory descriptors, etc. - Rather than spanning the entire implementation, these limits are now associated - with individual network interfaces. -\layout Subsection* - -Added User Ids (Section -\begin_inset LatexCommand \ref{sec:uid} - -\end_inset - -) -\layout Standard - -Group Ids had been used to simplify access control entries. - In particular, a process could allow access for all of the processes in - a group. - User Ids have been introduced to regain this functionality. - We use user ids to fill this role. -\layout Subsection* - -Removed Group Ids and Rank Ids (Section -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -) -\layout Standard - -The earlier version of Portals had two forms for addressing processes: <node - id, process id> and <group id, rank id>. - A process group was defined as the collection processes created during - application launch. - Each process in the group was given a unique rank id in the range 0 to - -\begin_inset Formula $n-1$ -\end_inset - - where -\begin_inset Formula $n$ -\end_inset - - was the number of processes in the group. - We removed groups because they are better handled in the runtime system. -\layout Subsection* - -Match lists (Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -) -\layout Standard - -It is no longer illegal to have an existing match entry when calling PtlMEAttach. - A position argument was added to the list of arguments supplied to -\emph on -PtlMEAttach -\emph default - to specify whether the new match entry is prepended or appended to the - existing list. - If there is no existing match list, the position argument is ignored. -\layout Subsection* - -Unlinking Memory Descriptors (Section -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -) -\layout Standard - -Previously, a memory descriptor could be unlinked if the offset exceeded - a threshold upon the completion of an operation. - In this version, the unlinking is delayed until there is a matching operation - which requires more memory than is currently available in the descriptor. - In addition to changes in section, this lead to a revision of Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - -. -\layout Subsection* - -Split Phase Operations and Events (Section -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - -) -\layout Standard - -Previously, there were five types of events: -\family typewriter -PTL_EVENT_PUT -\family default -, -\family typewriter -PTL_EVENT_GET -\family default -, -\family typewriter -PTL_EVENT_REPLY -\family default -, -\family typewriter -PTL_EVENT_SENT -\family default -, and -\family typewriter -PTL_EVENT_ACK. - -\family default -The first four of these reflected the completion of potentially long operations. - We have introduced new event types to reflect the fact that long operations - have a distinct starting point and a distinct completion point. - Moreover, the completion may be successful or unsuccessful. -\layout Standard - -In addition to providing a mechanism for reporting failure to higher levels - of software, this split provides an opportunity for for improved ordering - semantics. - Previously, if one process intiated two operations (e.g., two put operations) - on a remote process, these operations were guaranteed to complete in the - same order that they were initiated. - Now, we only guarantee that the initiation events are delivered in the - same order. - In particular, the operations do not need to complete in the order that - they were intiated. -\layout Subsection* - -Well known proces ids (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -To support the notion of -\begin_inset Quotes eld -\end_inset - -well known process ids, -\begin_inset Quotes erd -\end_inset - - we added a process id argument to the arguments for PtlNIInit. -\layout Chapter* - -Glossary -\layout Description - -API Application Programming Interface. - A definition of the functions and semantics provided by library of functions. - -\layout Description - -Initiator A -\emph on -process -\emph default - that initiates a message operation. - -\layout Description - -Message An application-defined unit of data that is exchanged between -\emph on -processes -\emph default -. - -\layout Description - -Message\SpecialChar ~ -Operation Either a put operation, which writes data, or a get operation, - which reads data. - -\layout Description - -Network A network provides point-to-point communication between -\emph on -nodes -\emph default -. - Internally, a network may provide multiple routes between endpoints (to - improve fault tolerance or to improve performance characteristics); however, - multiple paths will not be exposed outside of the network. - -\layout Description - -Node A node is an endpoint in a -\emph on -network -\emph default -. - Nodes provide processing capabilities and memory. - A node may provide multiple processors (an SMP node) or it may act as a - -\emph on -gateway -\emph default - between networks. - -\layout Description - -Process A context of execution. - A process defines a virtual memory (VM) context. - This context is not shared with other processes. - Several threads may share the VM context defined by a process. - -\layout Description - -Target A -\emph on -process -\emph default - that is acted upon by a message operation. - -\layout Description - -Thread A context of execution that shares a VM context with other threads. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\layout Standard - -\backslash -setcounter{page}{1} -\backslash -pagenumbering{arabic} -\end_inset - - -\layout Chapter - -Introduction -\begin_inset LatexCommand \label{sec:intro} - -\end_inset - - -\layout Section - -Overview -\layout Standard - -This document describes an application programming interface for message - passing between nodes in a system area network. - The goal of this interface is to improve the scalability and performance - of network communication by defining the functions and semantics of message - passing required for scaling a parallel computing system to ten thousand - nodes. - This goal is achieved by providing an interface that will allow a quality - implementation to take advantage of the inherently scalable design of Portals. -\layout Standard - -This document is divided into several sections: -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:intro} - -\end_inset - ----Introduction This section describes the purpose and scope of the Portals - API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - ----An\SpecialChar ~ -Overview\SpecialChar ~ -of\SpecialChar ~ -the\SpecialChar ~ -Portals\SpecialChar ~ -3.1\SpecialChar ~ -API This section gives a brief overview of the - Portals API. - The goal is to introduce the key concepts and terminology used in the descripti -on of the API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:api} - -\end_inset - ----The\SpecialChar ~ -Portals\SpecialChar ~ -3.2\SpecialChar ~ -API This section describes the functions and semantics of - the Portals application programming interface. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - ---The\SpecialChar ~ -Semantics\SpecialChar ~ -of\SpecialChar ~ -Message\SpecialChar ~ -Transmission This section describes the semantics - of message transmission. - In particular, the information transmitted in each type of message and - the processing of incoming messages. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:examples} - -\end_inset - ----Examples This section presents several examples intended to illustrates - the use of the Portals API. - -\layout Section - -Purpose -\layout Standard - -Existing message passing technologies available for commodity cluster networking - hardware do not meet the scalability goals required by the Cplant\SpecialChar ~ - -\begin_inset LatexCommand \cite{Cplant} - -\end_inset - - project at Sandia National Laboratories. - The goal of the Cplant project is to construct a commodity cluster that - can scale to the order of ten thousand nodes. - This number greatly exceeds the capacity for which existing message passing - technologies have been designed and implemented. -\layout Standard - -In addition to the scalability requirements of the network, these technologies - must also be able to support a scalable implementation of the Message Passing - Interface (MPI)\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPIstandard} - -\end_inset - - standard, which has become the -\shape italic -de facto -\shape default - standard for parallel scientific computing. - While MPI does not impose any scalability limitations, existing message - passing technologies do not provide the functionality needed to allow implement -ations of MPI to meet the scalability requirements of Cplant. -\layout Standard - -The following are properties of a network architecture that do not impose - any inherent scalability limitations: -\layout Itemize - -Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - - and TCP/IP sockets, have limitations on the number of peer connections - that can be established. - -\layout Itemize - -Network independence - Many communication systems depend on the host processor - to perform operations in order for messages in the network to be consumed. - Message consumption from the network should not be dependent on host processor - activity, such as the operating system scheduler or user-level thread scheduler. - -\layout Itemize - -User-level flow control - Many communication systems manage flow control - internally to avoid depleting resources, which can significantly impact - performance as the number of communicating processes increases. - -\layout Itemize - -OS Bypass - High performance network communication should not involve memory - copies into or out of a kernel-managed protocol stack. - -\layout Standard - -The following are properties of a network architecture that do not impose - scalability limitations for an implementation of MPI: -\layout Itemize - -Receiver-managed - Sender-managed message passing implementations require - a persistent block of memory to be available for every process, requiring - memory resources to increase with job size and requiring user-level flow - control mechanisms to manage these resources. - -\layout Itemize - -User-level Bypass - While OS Bypass is necessary for high-performance, it - alone is not sufficient to support the Progress Rule of MPI asynchronous - operations. - -\layout Itemize - -Unexpected messages - Few communication systems have support for receiving - messages for which there is no prior notification. - Support for these types of messages is necessary to avoid flow control - and protocol overhead. - -\layout Section - -Background -\layout Standard - -Portals was originally designed for and implemented on the nCube machine - as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~ - -\begin_inset LatexCommand \cite{SUNMOS} - -\end_inset - - and Puma\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaOS} - -\end_inset - - lightweight kernel development projects. - Portals went through two design phases, the latter of which is used on - the 4500-node Intel TeraFLOPS machine\SpecialChar ~ - -\begin_inset LatexCommand \cite{TFLOPS} - -\end_inset - -. - Portals have been very successful in meeting the needs of such a large - machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaMPI} - -\end_inset - -, but also for implementing the scalable run-time environment and parallel - I/O capabilities of the machine. -\layout Standard - -The second generation Portals implementation was designed to take full advantage - of the hardware architecture of large MPP machines. - However, efforts to implement this same design on commodity cluster technology - identified several limitations, due to the differences in network hardware - as well as to shortcomings in the design of Portals. -\layout Section - -Scalability -\layout Standard - -The primary goal in the design of Portals is scalability. - Portals are designed specifically for an implementation capable of supporting - a parallel job running on tens of thousands of nodes. - Performance is critical only in terms of scalability. - That is, the level of message passing performance is characterized by how - far it allows an application to scale and not by how it performs in micro-bench -marks (e.g., a two node bandwidth or latency test). -\layout Standard - -The Portals API is designed to allow for scalability, not to guarantee it. - Portals cannot overcome the shortcomings of a poorly designed application - program. - Applications that have inherent scalability limitations, either through - design or implementation, will not be transformed by Portals into scalable - applications. - Scalability must be addressed at all levels. - Portals do not inhibit scalability, but do not guarantee it either. -\layout Standard - -To support scalability, the Portals interface maintains a minimal amount - of state. - Portals provide reliable, ordered delivery of messages between pairs of - processes. - They are connectionless: a process is not required to explicitly establish - a point-to-point connection with another process in order to communicate. - Moreover, all buffers used in the transmission of messages are maintained - in user space. - The target process determines how to respond to incoming messages, and - messages for which there are no buffers are discarded. -\layout Section - -Communication Model -\layout Standard - -Portals combine the characteristics of both one-side and two-sided communication. - They define a -\begin_inset Quotes eld -\end_inset - -matching put -\begin_inset Quotes erd -\end_inset - - operation and a -\begin_inset Quotes eld -\end_inset - -matching get -\begin_inset Quotes erd -\end_inset - - operation. - The destination of a put (or send) is not an explicit address; instead, - each message contains a set of match bits that allow the receiver to determine - where incoming messages should be placed. - This flexibility allows Portals to support both traditional one-sided operation -s and two-sided send/receive operations. -\layout Standard - -Portals allows the target to determine whether incoming messages are acceptable. - A target process can choose to accept message operations from any specific - process or can choose to ignore message operations from any specific process. -\layout Section - -Zero Copy, OS Bypass and Application Bypass -\layout Standard - -In traditional system architectures, network packets arrive at the network - interface card (NIC), are passed through one or more protocol layers in - the operating system, and eventually copied into the address space of the - application. - As network bandwidth began to approach memory copy rates, reduction of - memory copies became a critical concern. - This concern lead to the development of zero-copy message passing protocols - in which message copies are eliminated or pipelined to avoid the loss of - bandwidth. -\layout Standard - -A typical zero-copy protocol has the NIC generate an interrupt for the CPU - when a message arrives from the network. - The interrupt handler then controls the transfer of the incoming message - into the address space of the appropriate application. - The interrupt latency, the time from the initiation of an interrupt until - the interrupt handler is running, is fairly significant. - To avoid this cost, some modern NICs have processors that can be programmed - to implement part of a message passing protocol. - Given a properly designed protocol, it is possible to program the NIC to - control the transfer of incoming messages, without needing to interrupt - the CPU. - Because this strategy does not need to involve the OS on every message - transfer, it is frequently called -\begin_inset Quotes eld -\end_inset - -OS Bypass. -\begin_inset Quotes erd -\end_inset - - ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - -, FM\SpecialChar ~ - -\begin_inset LatexCommand \cite{FM2} - -\end_inset - -, GM\SpecialChar ~ - -\begin_inset LatexCommand \cite{GM} - -\end_inset - -, and Portals are examples of OS Bypass protocols. -\layout Standard - -Many protocols that support OS Bypass still require that the application - actively participate in the protocol to ensure progress. - As an example, the long message protocol of PM requires that the application - receive and reply to a request to put or get a long message. - This complicates the runtime environment, requiring a thread to process - incoming requests, and significantly increases the latency required to - initiate a long message protocol. - The Portals message passing protocol does not require activity on the part - of the application to ensure progress. - We use the term -\begin_inset Quotes eld -\end_inset - -Application Bypass -\begin_inset Quotes erd -\end_inset - - to refer to this aspect of the Portals protocol. -\layout Section - -Faults -\layout Standard - -Given the number of components that we are dealing with and the fact that - we are interested in supporting applications that run for very long times, - failures are inevitable. - The Portals API recognizes that the underlying transport may not be able - to successfully complete an operation once it has been initiated. - This is reflected in the fact that the Portals API reports three types - of events: events indicating the initiation of an operation, events indicating - the successful completion of an operation, and events indicating the unsuccessf -ul completion of an operation. - Every initiation event is eventually followed by a successful completion - event or an unsuccessful completion event. -\layout Standard - -Between the time an operation is started and the time that the operation - completes (successfully or unsuccessfully), any memory associated with - the operation should be considered volatile. - That is, the memory may be changed in unpredictable ways while the operation - is progressing. - Once the operation completes, the memory associated with the operation - will not be subject to further modification (from this operation). - Notice that unsuccessful operations may alter memory in an essentially - unpredictable fashion. -\layout Chapter - -An Overview of the Portals API -\begin_inset LatexCommand \label{sec:apiover} - -\end_inset - - -\layout Standard - -In this section, we give a conceptual overview of the Portals API. - The goal is to provide a context for understanding the detailed description - of the API presented in the next section. -\layout Section - -Data Movement -\begin_inset LatexCommand \label{sec:dmsemantics} - -\end_inset - - -\layout Standard - -A Portal represents an opening in the address space of a process. - Other processes can use a Portal to read (get) or write (put) the memory - associated with the portal. - Every data movement operation involves two processes, the -\series bold -initiator -\series default - and the -\series bold -target -\series default -. - The initiator is the process that initiates the data movement operation. - The target is the process that responds to the operation by either accepting - the data for a put operation, or replying with the data for a get operation. -\layout Standard - -In this discussion, activities attributed to a process may refer to activities - that are actually performed by the process or -\emph on -on behalf of the process -\emph default -. - The inclusiveness of our terminology is important in the context of -\emph on -application bypass -\emph default -. - In particular, when we note that the target sends a reply in the case of - a get operation, it is possible that reply will be generated by another - component in the system, bypassing the application. -\layout Standard - -Figures\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:put} - -\end_inset - - and -\begin_inset LatexCommand \ref{fig:get} - -\end_inset - - present graphical interpretations of the Portal data movement operations: - put and get. - In the case of a put operation, the initiator sends a put request message - containing the data to the target. - The target translates the Portal addressing information in the request - using its local Portal structures. - When the request has been processed, the target optionally sends an acknowledge -ment message. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename put.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Put (Send) -\begin_inset LatexCommand \label{fig:put} - -\end_inset - - -\end_inset - - -\layout Standard - -In the case of a get operation, the initiator sends a get request to the - target. - As with the put operation, the target translates the Portal addressing - information in the request using its local Portal structures. - Once it has translated the Portal addressing information, the target sends - a reply that includes the requested data. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename get.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Get -\begin_inset LatexCommand \label{fig:get} - -\end_inset - - -\end_inset - - -\layout Standard - -We should note that Portal address translations are only performed on nodes - that respond to operations initiated by other nodes. - Acknowledgements and replies to get operations bypass the portals address - translation structures. -\layout Section - -Portal Addressing -\begin_inset LatexCommand \label{subsec:paddress} - -\end_inset - - -\layout Standard - -One-sided data movement models (e.g., shmem\SpecialChar ~ - -\begin_inset LatexCommand \cite{CraySHMEM} - -\end_inset - -, ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, MPI-2\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPI2} - -\end_inset - -) typically use a triple to address memory on a remote node. - This triple consists of a process id, memory buffer id, and offset. - The process id identifies the target process, the memory buffer id specifies - the region of memory to be used for the operation, and the offset specifies - an offset within the memory buffer. -\layout Standard - -In addition to the standard address components (process id, memory buffer - id, and offset), a Portal address includes a set of match bits. - This addressing model is appropriate for supporting one-sided operations - as well as traditional two-sided message passing operations. - Specifically, the Portals API provides the flexibility needed for an efficient - implementation of MPI-1, which defines two-sided operations with one-sided - completion semantics. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:portals} - -\end_inset - - presents a graphical representation of the structures used by a target - in the interpretation of a Portal address. - The process id is used to route the message to the appropriate node and - is not reflected in this diagram. - The memory buffer id, called the -\series bold -portal id -\series default -, is used as an index into the Portal table. - Each element of the Portal table identifies a match list. - Each element of the match list specifies two bit patterns: a set of -\begin_inset Quotes eld -\end_inset - -don't care -\begin_inset Quotes erd -\end_inset - - bits, and a set of -\begin_inset Quotes eld -\end_inset - -must match -\begin_inset Quotes erd -\end_inset - - bits. - In addition to the two sets of match bits, each match list element has - at most one memory descriptor. - Each memory descriptor identifies a memory region and an optional event - queue. - The memory region specifies the memory to be used in the operation and - the event queue is used to record information about these operations. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename portals.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 305pt - lyxheight 106pt -\end_inset - - -\layout Caption - -Portal Addressing Structures -\begin_inset LatexCommand \label{fig:portals} - -\end_inset - - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - - illustrates the steps involved in translating a Portal address, starting - from the first element in a match list. - If the match criteria specified in the match list entry are met and the - memory descriptor list accepts the operation -\begin_inset Foot -collapsed true - -\layout Standard - -Memory descriptors can reject operations because a threshold has been exceeded - or because the memory region does not have sufficient space, see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - -, the operation (put or get) is performed using the memory region specified - in the memory descriptor. - If the memory descriptor specifies that it is to be unlinked when a threshold - has been exceeded, the match list entry is removed from the match list - and the resources associated with the memory descriptor and match list - entry are reclaimed. - Finally, if there is an event queue specified in the memory descriptor, - the operation is logged in the event queue. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename flow_new.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 447pt - lyxheight 282pt -\end_inset - - -\layout Caption - -Portals Address Translation -\begin_inset LatexCommand \label{fig:flow} - -\end_inset - - -\end_inset - - -\layout Standard - -If the match criteria specified in the match list entry are not met, or - there is no memory descriptor associated with the match list entry, or - the memory descriptor associated with the match list entry rejects the - operation, the address translation continues with the next match list entry. - If the end of the match list has been reached, the address translation - is aborted and the incoming requested is discarded. -\layout Section - -Access Control -\layout Standard - -A process can control access to its portals using an access control list. - Each entry in the access control list specifies a process id and a Portal - table index. - The access control list is actually an array of entries. - Each incoming request includes an index into the access control list (i.e., - a -\begin_inset Quotes eld -\end_inset - -cookie -\begin_inset Quotes erd -\end_inset - - or hint). - If the id of the process issuing the request doesn't match the id specified - in the access control list entry or the Portal table index specified in - the request doesn't match the Portal table index specified in the access - control list entry, the request is rejected. - Process identifiers and Portal table indexes may include wild card values - to increase the flexibility of this mechanism. - -\layout Standard - -Two aspects of this design merit further discussion. - First, the model assumes that the information in a message header, the - sender's id in particular, is trustworthy. - In most contexts, we assume that the entity that constructs the header - is trustworthy; however, using cryptographic techniques, we could easily - devise a protocol that would ensure the authenticity of the sender. -\layout Standard - -Second, because the access check is performed by the receiver, it is possible - that a malicious process will generate thousands of messages that will - be denied by the receiver. - This could saturate the network and/or the receiver, resulting in a -\emph on -denial of service -\emph default - attack. - Moving the check to the sender using capabilities, would remove the potential - for this form of attack. - However, the solution introduces the complexities of capability management - (exchange of capabilities, revocation, protections, etc). -\layout Section - -Multi-threaded Applications -\begin_inset LatexCommand \label{sec:threads} - -\end_inset - - -\layout Standard - -The Portals API supports a generic view of multi-threaded applications. - From the perspective of the Portals API, an application program is defined - by a set of processes. - Each process defines a unique address space. - The Portals API defines access to this address space from other processes - (using portals addressing and the data movement operations). - A process may have one or more -\emph on -threads -\emph default - executing in its address space. - -\layout Standard - -With the exception of -\emph on -PtlEQWait -\emph default - every function in the Portals API is non-blocking and atomic with respect - to both other threads and external operations that result from data movement - operations. - While individual operations are atomic, sequences of these operations may - be interleaved between different threads and with external operations. - The Portals API does not provide any mechanisms to control this interleaving. - It is expected that these mechanisms will be provided by the API used to - create threads. -\layout Chapter - -The Portals API -\begin_inset LatexCommand \label{sec:api} - -\end_inset - - -\layout Section - -Naming Conventions -\begin_inset LatexCommand \label{sec:conv} - -\end_inset - - -\layout Standard - -The Portals API defines two types of entities: functions and types. - Function always start with -\emph on -Ptl -\emph default - and use mixed upper and lower case. - When used in the body of this report, function names appear in italic face, - e.g., -\emph on -PtlInit -\emph default -. - The functions associated with an object type will have names that start - with -\emph on -Ptl -\emph default -, followed by the two letter object type code shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - As an example, the function -\emph on -PtlEQAlloc -\emph default - allocates resources for an event queue. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Object Type Codes -\begin_inset LatexCommand \label{tab:objcodes} - -\end_inset - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\newline - -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -xx -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -EQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Event Queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Memory Descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - ME -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Match list Entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Network Interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Type names use lower case with underscores to separate words. - Each type name starts with -\family typewriter -ptl -\family default -_ and ends with -\family typewriter -_t -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -ptl_match_bits_t -\family default -. -\layout Standard - -Names for constants use upper case with underscores to separate words. - Each constant name starts with -\family typewriter -PTL_ -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -PTL_OK -\family default -. -\layout Section - -Base Types -\layout Standard - -The Portals API defines a variety of base types. - These types represent a simple renaming of the base types provided by the - C programming language. - In most cases these new type names have been introduced to improve type - safety and to avoid issues arising from differences in representation sizes - (e.g., 16-bit or 32-bit integers). -\layout Subsection - -Sizes -\begin_inset LatexCommand \label{sec:size-t} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_size_t -\family default - is an unsigned 64-bit integral type used for representing sizes. -\layout Subsection - -Handles -\begin_inset LatexCommand \label{sec:handle-type} - -\end_inset - - -\layout Standard - -Objects maintained by the API are accessed through handles. - Handle types have names of the form -\family typewriter -ptl_handle_ -\emph on -xx -\emph default -_t -\family default -, where -\emph on -xx -\emph default - is one of the two letter object type codes shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - For example, the type -\family typewriter -ptl_handle_ni_t -\family default - is used for network interface handles. -\layout Standard - -Each type of object is given a unique handle type to enhance type checking. - The type, -\family typewriter -ptl_handle_any_t -\family default -, can be used when a generic handle is needed. - Every handle value can be converted into a value of type -\family typewriter -ptl_handle_any_t -\family default - without loss of information. -\layout Standard - -Handles are not simple values. - Every portals object is associated with a specific network interface and - an identifier for this interface (along with an object identifier) is part - of the handle for the object. -\layout Standard - -The special value -\family typewriter -PTL_EQ_NONE -\family default -, of type -\family typewriter -ptl_handle_eq_t -\family default -, is used to indicate the absence of an event queue. - See sections -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - for uses of this value. -\layout Subsection - -Indexes -\begin_inset LatexCommand \label{sec:index-type} - -\end_inset - - -\layout Standard - -The types -\family typewriter -ptl_pt_index_t -\family default - and -\family typewriter -ptl_ac_index_t -\family default - are integral types used for representing Portal table indexes and access - control tables indexes, respectively. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - - for limits on values of these types. -\layout Subsection - -Match Bits -\begin_inset LatexCommand \label{sec:mb-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_match_bits_t -\family default - is capable of holding unsigned 64-bit integer values. -\layout Subsection - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_interface_t -\family default - is an integral type used for identifying different network interfaces. - Users will need to consult the local documentation to determine appropriate - values for the interfaces available. - The special value -\family typewriter -PTL_IFACE_DEFAULT -\family default - identifies the default interface. -\layout Subsection - -Identifiers -\begin_inset LatexCommand \label{sec:id-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_nid_t -\family default - is an integral type used for representing node ids -\family typewriter -, ptl_pid_t -\family default - is an integral type for representing process ids, and -\family typewriter -ptl_uid_t -\family default -is an integral type for representing user ids. -\layout Standard - -The special values -\family typewriter -PTL_PID_ANY -\family default - matches any process identifier, PTL_NID_ANY matches any node identifier, - and -\family typewriter -PTL_UID_ANY -\family default - matches any user identifier. - See sections -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - for uses of these values. -\layout Subsection - -Status Registers -\begin_inset LatexCommand \label{sec:stat-type} - -\end_inset - - -\layout Standard - -Each network interface maintains an array of status registers that can be - accessed using the -\family typewriter -PtlNIStatus -\family default - function (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - The type -\family typewriter -ptl_sr_index_t -\family default - defines the types of indexes that can be used to access the status registers. - The only index defined for all implementations is -\family typewriter -PTL_SR_DROP_COUNT -\family default - which identifies the status register that counts the dropped requests for - the interface. - Other indexes (and registers) may be defined by the implementation. -\layout Standard - -The type -\family typewriter -ptl_sr_value_t -\family default - defines the types of values held in status registers. - This is a signed integer type. - The size is implementation dependent, but must be at least 32 bits. -\layout Section - -Initialization and Cleanup -\begin_inset LatexCommand \label{sec:init} - -\end_inset - - -\layout Standard - -The Portals API includes a function, -\emph on -PtlInit -\emph default -, to initialize the library and a function, -\emph on -PtlFini -\emph default -, to cleanup after the application is done using the library. -\layout Subsection - -PtlInit -\begin_inset LatexCommand \label{sec:ptlinit} - -\end_inset - - -\layout LyX-Code - -int PtlInit( int *max_interfaces ); -\layout Standard -\noindent -The -\emph on -PtlInit -\emph default - function initializes the Portals library. - PtlInit must be called at least once by a process before any thread makes - a Portals function call, but may be safely called more than once. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_FAIL Indicates an error during initialization. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -max_interfaces -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -max_interfaces -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the maximum number of interfaces - that can be initialized. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlFini -\begin_inset LatexCommand \label{sec:ptlfini} - -\end_inset - - -\layout LyX-Code - -void PtlFini( void ); -\layout Standard -\noindent -The -\emph on -PtlFini -\emph default - function cleans up after the Portals library is no longer needed by a process. - After this function is called, calls to any of the functions defined by - the Portal API or use of the structures set up by the Portals API will - result in undefined behavior. - This function should be called once and only once during termination by - a process. - Typically, this function will be called in the exit sequence of a process. - Individual threads should not call PtlFini when they terminate. -\layout Section - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni} - -\end_inset - - -\layout Standard - -The Portals API supports the use of multiple network interfaces. - However, each interface is treated as an independent entity. - Combining interfaces (e.g., -\begin_inset Quotes eld -\end_inset - -bonding -\begin_inset Quotes erd -\end_inset - - to create a higher bandwidth connection) must be implemented by the application - or embedded in the underlying network. - Interfaces are treated as independent entities to make it easier to cache - information on individual network interface cards. -\layout Standard - -Once initialized, each interface provides a Portal table, an access control - table, and a collection of status registers. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for a discussion of updating Portal table entries using the -\emph on -PtlMEAttach -\emph default - function. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - for a discussion of the initialization and updating of entries in the access - control table. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - for a discussion of the -\emph on -PtlNIStatus -\emph default - function which can be used to determine the value of a status register. -\layout Standard - -Every other type of Portal object (e.g., memory descriptor, event queue, or - match list entry) is associated with a specific network interface. - The association to a network interface is established when the object is - created and is encoded in the handle for the object. -\layout Standard - -Each network interface is initialized and shutdown independently. - The initialization routine, -\emph on -PtlNIInit -\emph default -, returns a handle for an interface object which is used in all subsequent - Portal operations. - The -\emph on -PtlNIFini -\emph default - function is used to shutdown an interface and release any resources that - are associated with the interface. - Network interface handles are associated with processes, not threads. - All threads in a process share all of the network interface handles. -\layout Standard - -The Portals API also defines the -\emph on -PtlNIStatus -\emph default - function to query the status registers for a network interface, the -\emph on -PtlNIDist -\emph default - function to determine the -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - to another process, and the -\emph on -PtlNIHandle -\emph default - function to determine the network interface that an object is associated - with. -\layout Subsection - -PtlNIInit -\begin_inset LatexCommand \label{sec:niinit} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - int max_match_entries; -\newline - int max_mem_descriptors; -\newline - int max_event_queues; -\newline - ptl_ac_index_t max_atable_index; -\newline - ptl_pt_index_t max_ptable_index; -\newline -} ptl_ni_limits_t; -\newline - -\newline -int PtlNIInit( ptl_interface_t interface -\newline - ptl_pid_t pid, -\newline - ptl_ni_limits_t* desired, -\newline - ptl_ni_limits_t* actual, -\newline - ptl_handle_ni_t* handle ); -\layout Standard - -Values of type -\family typewriter -ptl_ni_limits_t -\family default - include the following members: -\layout Description - -max_match_entries Maximum number of match entries that can be allocated - at any one time. -\layout Description - -max_mem_descriptors Maximum number of memory descriptors that can be allocated - at any one time. -\layout Description - -max_event_queues Maximum number of event queues that can be allocated at - any one time. -\layout Description - -max_atable_index Largest access control table index for this interface, - valid indexes range from zero to -\family typewriter -max_atable_index -\family default -, inclusive. -\layout Description - -max_ptable_index Largest Portal table index for this interface, valid indexes - range from zero to -\family typewriter -max_ptable_index -\family default -, inclusive. -\layout Standard -\noindent -The -\emph on -PtlNIInit -\emph default - function is used to initialized the Portals API for a network interface. - This function must be called at least once by each process before any other - operations that apply to the interface by any process or thread. - For subsequent calls to -\shape italic -PtlNIInit -\shape default - from within the same process (either by different threads or the same thread), - the desired limits will be ignored and the call will return the existing - NI handle. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INIT_DUP Indicates a duplicate initialization of -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INIT_INV Indicates that -\family typewriter -interface -\family default - is not a valid network interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to initialize the - interface. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -pid -\family default - is not a valid process id. -\layout Description - -PTL_SEGV Indicates that -\family typewriter -actual -\family default -or -\family typewriter - handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the network interface to be initialized. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - for a discussion of values used to identify network interfaces.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -pid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the desired process id (for well known process ids). - The value -\family typewriter -PTL_PID_ANY -\family default - may be used to have the process id assigned by the underlying library. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -desired -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If non-NULL, points to a structure that holds the desired limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -actual -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, the location pointed to by actual will hold the actual - limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the interface. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The use of desired is implementation dependent. - In particular, an implementation may choose to ignore this argument. -\layout Subsection - -PtlNIFini -\begin_inset LatexCommand \label{sec:nifini} - -\end_inset - - -\layout LyX-Code - -int PtlNIFini( ptl_handle_ni_t interface ); -\layout Standard -\noindent -The -\emph on -PtlNIFini -\emph default - function is used to release the resources allocated for a network interface. - Once the -\emph on -PtlNIFini -\emph default - operation has been started, the results of pending API operations (e.g., - operations initiated by another thread) for this interface are undefined. - Similarly, the effects of incoming operations (puts and gets) or return - values (acknowledgements and replies) for this interface are undefined. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the interface to shutdown. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlNIStatus -\begin_inset LatexCommand \label{sec:nistatus} - -\end_inset - - -\layout LyX-Code - -int PtlNIStatus( ptl_handle_ni_t interface, -\newline - ptl_sr_index_t status_register, -\newline - ptl_sr_value_t* status ); -\layout Standard -\noindent -The -\emph on -PtlNIStatus -\emph default - function returns the value of a status register for the specified interface. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - for more information on status register indexes and status register values.) -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_SR_INDX Indicates that -\family typewriter -status_register -\family default - is not a valid status register. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -status -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status_register -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An index for the status register to read. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the current value of the status - register. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The only status register that must be defined is a drop count register ( -\family typewriter -PTL_SR_DROP_COUNT -\family default -). - Implementations may define additional status registers. - Identifiers for the indexes associated with these registers should start - with the prefix -\family typewriter -PTL_SR_ -\family default -. -\layout Subsection - -PtlNIDist -\layout LyX-Code - -int PtlNIDist( ptl_handle_ni_t interface, -\newline - ptl_process_id_t process, -\newline - unsigned long* distance ); -\layout Standard -\noindent -The -\emph on -PtlNIDist -\emph default - function returns the distance to another process using the specified interface. - Distances are only defined relative to an interface. - Distance comparisons between different interfaces on the same process may - be meaningless. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -process -\family default - is not a valid process identifier. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -distance -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -process -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An identifier for the process whose distance is being requested. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -distance -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the distance to the remote - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -This function should return a static measure of distance. - Examples include minimum latency, the inverse of available bandwidth, or - the number of switches between the two endpoints. -\layout Subsection - -PtlNIHandle -\layout LyX-Code - -int PtlNIHandle( ptl_handle_any_t handle, -\newline - ptl_handle_ni_t* interface ); -\layout Standard -\noindent -The -\emph on -PtlNIHandle -\emph default - function returns a handle for the network interface with which the object - identified by -\family typewriter -handle -\family default - is associated. - If the object identified by -\family typewriter -handle -\family default - is a network interface, this function returns the same value it is passed. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_HANDLE Indicates that -\family typewriter -handle -\family default - is not a valid handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the object. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the network interface - associated with -\family typewriter -handle -\family default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Every handle should encode the network interface and the object id relative - to this handle. - Both are presumably encoded using integer values. -\layout Section - -User Identification -\begin_inset LatexCommand \label{sec:uid} - -\end_inset - - -\layout Standard - -Every process runs on behalf of a user. - -\layout Subsection - -PtlGetUid -\layout LyX-Code - -int PtlGetUid( ptl_handle_ni_t ni_handle, -\newline - ptl_uid_t* uid ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the user id for the calling - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that user identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, a process may have multiple - user identifiers. -\layout Section - -Process Identification -\begin_inset LatexCommand \label{sec:pid} - -\end_inset - - -\layout Standard - -Processes that use the Portals API, can be identified using a node id and - process id. - Every node accessible through a network interface has a unique node identifier - and every process running on a node has a unique process identifier. - As such, any process in the computing system can be identified by its node - id and process id. - -\layout Standard - -The Portals API defines a type, -\family typewriter -ptl_process_id_t -\family default - for representing process ids and a function, -\emph on -PtlGetId -\emph default -, which can be used to obtain the id of the current process. -\layout Comment - -The portals API does not include thread identifiers. - Messages are delivered to processes (address spaces) not threads (contexts - of execution). -\layout Subsection - -The Process Id Type -\begin_inset LatexCommand \label{sec:pid-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_nid_t nid; /* node id */ -\newline - ptl_pid_t pid; /* process id */ -\newline -} ptl_process_id_t; -\layout Standard -\noindent -The -\family typewriter -ptl_process_id_t -\family default - type uses two identifiers to represent a process id: a node id and a process - id. - -\layout Subsection - -PtlGetId -\begin_inset LatexCommand \label{sub:PtlGetId} - -\end_inset - - -\layout LyX-Code - -int PtlGetId( ptl_handle_ni_t ni_handle, -\newline - ptl_process_id_t* id ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -id -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the id for the calling process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that process identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, it may have multiple - node identifiers. -\layout Section - -Match List Entries and Match Lists -\begin_inset LatexCommand \label{sec:me} - -\end_inset - - -\layout Standard - -A match list is a chain of match list entries. - Each match list entry includes a memory descriptor and a set of match criteria. - The match criteria can be used to reject incoming requests based on process - id or the match bits provided in the request. - A match list is created using the -\emph on -PtlMEAttach -\emph default - or -\shape italic -PtlMEAttachAny -\shape default - functions, which create a match list consisting of a single match list - entry, attaches the match list to the specified Portal index, and returns - a handle for the match list entry. - Match entries can be dynamically inserted and removed from a match list - using the -\emph on -PtlMEInsert -\emph default - and -\emph on -PtlMEUnlink -\emph default - functions. -\layout Subsection - -PtlMEAttach -\begin_inset LatexCommand \label{sec:meattach} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t; -\newline - -\layout LyX-Code - -typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t; -\newline - -\layout LyX-Code - -int PtlMEAttach( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ins_pos_t -\family default - are used to control where a new item is inserted. - The value -\family typewriter -PTL_INS_BEFORE -\family default - is used to insert the new item before the current item or before the head - of the list. - The value -\family typewriter -PTL_INS_AFTER -\family default - is used to insert the new item after the current item or after the last - item in the list. - -\layout Standard - -The -\emph on -PtlMEAttach -\emph default - function creates a match list consisting of a single entry and attaches - this list to the Portal table for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PTINDEX Indicates that -\family typewriter -index -\family default - is not a valid Portal table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="7" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The Portal table index where the match list should be attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specifies the match criteria for the process id of the requestor. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -match_bits, ignorebits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specify the match criteria to apply to the match bits in the incoming request. - The -\family typewriter -ignorebits -\family default - are used to mask out insignificant bits in the incoming match bits. - The resulting bits are then compared to the match list entry's match - bits to determine if the incoming request meets the match criteria. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates the match list entry should be unlinked when the last memory descripto -r associated with this match list entry is unlinked. - (Note, the check for unlinking a match entry only occurs when a memory - descriptor is unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be prepended or appended to - the existing match list. - If there is no existing list, this argument is ignored and the new match - entry becomes the only entry in the list. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEAttachAny -\begin_inset LatexCommand \label{sec:attachany} - -\end_inset - - -\layout LyX-Code - -int PtlMEAttachAny( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t *index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEAttachAny -\emph default - function creates a match list consisting of a single entry and attaches - this list to an unused Portal table entry for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_PT_FULL Indicates that there are no free entries in the Portal table. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On succesfful return, this location will hold the Portal index where the - match list has been attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid, match_bits, ignorebits, unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\shape italic -PtlMEAttach -\shape default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEInsert -\begin_inset LatexCommand \label{sec:meinsert} - -\end_inset - - -\layout LyX-Code - -int PtlMEInsert( ptl_handle_me_t current, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEInsert -\emph default - function creates a new match list entry and inserts this entry into the - match list containing -\family typewriter -current -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -current -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match entry. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -current -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for a match entry. - The new match entry will be inserted immediately before or immediately - after this match entry. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\family default -, -\family typewriter -match_bits -\family default -, -\family typewriter -ignorebits -\family default -, -\family typewriter -unlink -\family default - -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be inserted before or after - the -\family typewriter -current -\family default - entry. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEUnlink -\begin_inset LatexCommand \label{sec:meunlink} - -\end_inset - - -\layout LyX-Code - -int PtlMEUnlink( ptl_handle_me_t entry ); -\layout Standard -\noindent -The -\emph on -PtlMEUnlink -\emph default - function can be used to unlink a match entry from a match list. - This operation also releases any resources associated with the match entry - (including the associated memory descriptor). - It is an error to use the match entry handle after calling -\emph on -PtlMEUnlink -\emph default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -entry -\family default - is not a valid match entry handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -entry -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the match entry to be unlinked. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Memory Descriptors -\begin_inset LatexCommand \label{sec:md} - -\end_inset - - -\layout Standard - -A memory descriptor contains information about a region of an application - process' memory and an event queue where information about the operations - performed on the memory descriptor are recorded. - The Portals API provides two operations to create memory descriptors: -\emph on -PtlMDAttach -\emph default -, and -\emph on -PtlMDBind -\emph default -; an operation to update a memory descriptor, -\emph on -PtlMDUpdate -\emph default -; and an operation to unlink and release the resources associated with a - memory descriptor, -\emph on -PtlMDUnlink -\emph default -. -\layout Subsection - -The Memory Descriptor Type -\begin_inset LatexCommand \label{sec:md-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - void* start; -\newline - ptl_size_t length; -\newline - int threshold; -\newline - unsigned int max_offset; -\newline - unsigned int options; -\newline - void* user_ptr; -\newline - ptl_handle_eq_t eventq; -\newline -} ptl_md_t; -\layout Standard -\noindent -The -\family typewriter -ptl_md_t -\family default - type defines the application view of a memory descriptor. - Values of this type are used to initialize and update the memory descriptors. -\layout Subsubsection - -Members -\layout Description - -start,\SpecialChar ~ -length Specify the memory region associated with the memory descriptor. - The -\family typewriter -start -\family default - member specifies the starting address for the memory region and the -\family typewriter -length -\family default - member specifies the length of the region. - The -\family typewriter -start member -\family default - can be NULL provided that the -\family typewriter -length -\family default - member is zero. - (Zero length buffers are useful to record events.) There are no alignment - restrictions on the starting address or the length of the region; although, - unaligned messages may be slower (i.e., lower bandwidth and/or longer latency) - on some implementations. - -\layout Description - -threshold Specifies the maximum number of operations that can be performed - on the memory descriptor. - An operation is any action that could possibly generate an event (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - for the different types of events). - In the usual case, the threshold value is decremented for each operation - on the memory descriptor. - When the threshold value is zero, the memory descriptor is -\emph on -inactive -\emph default -, and does not respond to operations. - A memory descriptor can have an initial threshold value of zero to allow - for manipulation of an inactive memory descriptor by the local process. - A threshold value of -\family typewriter -PTL_MD_THRESH_INF -\family default - indicates that there is no bound on the number of operations that may be - applied to a memory descriptor. - Note that local operations (e.g., -\emph on -PtlMDUpdate -\emph default -) are not applied to the threshold count. - -\layout Description - -max_offset Specifies the maximum local offset of a memory descriptor. - When the local offset of a memory descriptor exceeds this maximum, the - memory descriptor becomes -\shape italic -inactive -\shape default - and does not respond to further operations. -\layout Description - -options Specifies the behavior of the memory descriptor. - There are five options that can be selected: enable put operations (yes - or no), enable get operations (yes or no), offset management (local or - remote), message truncation (yes or no), and acknowledgement (yes or no). - Values for this argument can be constructed using a bitwise or of the following - values: -\begin_deeper -\begin_deeper -\layout Description - -PTL_MD_OP_PUT Specifies that the memory descriptor will respond to -\emph on -put -\emph default - operations. - By default, memory descriptors reject -\emph on -put -\emph default - operations. - -\layout Description - -PTL_MD_OP_GET Specifies that the memory descriptor will respond to -\emph on -get -\emph default - operations. - By default, memory descriptors reject -\emph on -get -\emph default - operations. - -\layout Description - -PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory - region is provided by the incoming request. - By default, the offset is maintained locally. - When the offset is maintained locally, the offset is incremented by the - length of the request so that the next operation (put and/or get) will - access the next part of the memory region. -\layout Description - -PTL_MD_TRUNCATE Specifies that the length provided in the incoming request - can be reduced to match the memory available in the region. - (The memory available in a memory region is determined by subtracting the - offset from the length of the memory region.) By default, if the length - in the incoming operation is greater than the amount of memory available, - the operation is rejected. - -\layout Description - -PTL_MD_ACK_DISABLE Specifies that an acknowledgement should -\emph on -not -\emph default - be sent for incoming -\emph on -put -\emph default - operations, even if requested. - By default, acknowledgements are sent for -\emph on -put -\emph default - operations that request an acknowledgement. - Acknowledgements are never sent for -\emph on -get -\emph default - operations. - The value sent in the reply serves as an implicit acknowledgement. - -\end_deeper -\layout Standard - - -\series bold -Note -\series default -: It is not considered an error to have a memory descriptor that does not - respond to either -\emph on -put -\emph default - or -\emph on -get -\emph default - operations: Every memory descriptor responds to -\emph on -reply -\emph default - operations. - Nor is it considered an error to have a memory descriptor that responds - to both -\emph on -put -\emph default - and -\emph on -get -\emph default - operations. - -\end_deeper -\layout Description - -user_ptr A user-specified value that is associated with the memory descriptor. - The value does not need to be a pointer, but must fit in the space used - by a pointer. - This value (along with other values) is recorded in events associated with - operations on this memory descriptor. -\begin_inset Foot -collapsed true - -\layout Standard - -Tying the memory descriptor to a user-defined value can be useful when multiple - memory descriptor share the same event queue or when the memory descriptor - needs to be associated with a data structure maintained by the application. - For example, an MPI implementation can set the -\family typewriter -user_ptr -\family default - argument to the value of an MPI Request. - This direct association allows for processing of memory descriptor's by - the MPI implementation without a table lookup or a search for the appropriate - MPI Request. -\end_inset - - -\layout Description - -eventq A handle for the event queue used to log the operations performed - on the memory region. - If this argument is -\family typewriter -PTl_EQ_NONE -\family default -, operations performed on this memory descriptor are not logged. - -\layout Subsection - -PtlMDAttach -\begin_inset LatexCommand \label{sec:mdattach} - -\end_inset - - -\layout LyX-Code - -int PtlMDAttach( ptl_handle_me_t match, -\newline - ptl_md_t mem_desc, -\newline - ptl_unlink_t unlink_op, -\newline - ptl_unlink_t unlink_nofit, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_unlink_t -\family default - are used to control whether an item is unlinked from a list. - The value -\family typewriter -PTL_UNLINK -\family default - enables unlinking. - The value -\family typewriter -PTL_RETAIN -\family default - disables unlinking. -\layout Standard - -The -\emph on -PtlMDAttach -\emph default - operation is used to create a memory descriptor and attach it to a match - list entry. - An error code is returned if this match list entry already has an associated - memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INUSE Indicates that -\family typewriter -match -\family default - already has a memory descriptor attached. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -match -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface associated with -\family typewriter -match -\family default -. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the match entry that the memory descriptor will be associated - with. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_op -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when it becomes - inactive, either because the operation threshold drops to zero or because - the maximum offset has been exceeded. - (Note, the check for unlinking a memory descriptor only occurs after a - the completion of a successful operation. - If the threshold is set to zero during initialization or using -\emph on -PtlMDUpdate -\emph default -, the memory descriptor is -\series bold -not -\series default - unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_nofit -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when the space - remaining in the memory descriptor is not sufficient for a matching operation. - If an incoming message arrives arrives at a memory descriptor that does - not have sufficient space and the -\series bold -PTL_MD_TRUNCATE -\series default - operation is not specified, the memory descriptor will be unlinked. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument can be NULL, in which case the handle will not be returned. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDBind -\begin_inset LatexCommand \label{sec:mdbind} - -\end_inset - - -\layout LyX-Code - -int PtlMDBind( ptl_handle_ni_t interface, -\newline - ptl_md_t mem_desc, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlMDBind -\emph default - operation is used to create a -\begin_inset Quotes eld -\end_inset - -free floating -\begin_inset Quotes erd -\end_inset - - memory descriptor, i.e., a memory descriptor that is not associated with - a match list entry. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface, -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INV_EQ Indicates that the event queue associated with -\family typewriter -mem_desc -\family default - is not valid. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the network interface with which the memory descriptor will - be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument must be a valid address and cannot be NULL. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUnlink -\begin_inset LatexCommand \label{sec:mdfree} - -\end_inset - - -\layout LyX-Code - -int PtlMDUnlink( ptl_handle_md_t mem_desc ); -\layout Standard -\noindent -The -\emph on -PtlMDUnlink -\emph default - function unlinks the memory descriptor from any match list entry it may - be linked to and releases the resources associated with a memory descriptor. - (This function does not free the memory region associated with the memory - descriptor.) This function also releases the resources associated with a - floating memory descriptor. - Only memory descriptors with no pending operations may be unlinked. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. -\layout Description - -PTL_MD_INUSE Indicates that -\family typewriter -mem_desc -\family default - has pending operations and cannot be unlinked. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUpdate -\begin_inset LatexCommand \label{sec:mdupdate} - -\end_inset - - -\layout LyX-Code - -int PtlMDUpdate( ptl_handle_md_t mem_desc, -\newline - ptl_md_t* old_md, -\newline - ptl_md_t* new_md, -\newline - ptl_handle_eq_t testq ); -\layout Standard -\noindent -The -\emph on -PtlMDUpdate -\emph default - function provides a conditional, atomic update operation for memory descriptors. - The memory descriptor identified by -\family typewriter -mem_desc -\family default - is only updated if the event queue identified by -\family typewriter -testq -\family default - is empty. - The intent is to only enable updates to the memory descriptor when no new - messages have arrived since the last time the queue was checked. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - - for an example of how this function can be used. -\layout Standard - -If -\family typewriter -new -\family default - is not NULL the memory descriptor identified by handle will be updated - to reflect the values in the structure pointed to by -\family typewriter -new -\family default - if -\family typewriter -testq -\family default - has the value -\family typewriter -PTL_EQ_NONE -\family default - or if the event queue identified by -\family typewriter -testq -\family default - is empty. - If -\family typewriter -old -\family default - is not NULL, the current value of the memory descriptor identified by -\family typewriter -mem_desc -\family default - is recorded in the location identified by -\family typewriter -old -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_NOUPDATE Indicates that the update was not performed because -\family typewriter -testq -\family default - was not empty. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. - -\layout Description - -PTL_ILL_MD Indicates that the value pointed to by -\family typewriter -new -\family default - is not a legal memory descriptor (e.g., the memory region specified by the - memory descriptor may be invalid). - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -testq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -new -\family default - or -\family typewriter -old -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -old_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -old_md -\family default - is not the value -\family typewriter -NULL -\family default -, the current value of the memory descriptor will be stored in the location - identified by -\family typewriter -old -\family default -_md. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -new_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -new_md -\family default - is not the value -\family typewriter -NULL -\family default -, this argument provides the new values for the memory descriptor, if the - update is performed. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -testq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for an event queue used to predicate the update. - If -\family typewriter -testq -\family default - is equal to -\family typewriter -PTL_EQ_NONE -\family default -, the update is performed unconditionally. - Otherwise, the update is performed if and only if -\family typewriter -testq -\family default - is empty. - If the update is not performed, the function returns the value -\family typewriter -PTL_NOUPDATE -\family default -. - (Note, the -\family typewriter -testq -\family default - argument does not need to be the same as the event queue associated with - the memory descriptor.) -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Standard - -The conditional update can be used to ensure that the memory descriptor - has not changed between the time it was examined and the time it is updated. - In particular, it is needed to support an MPI implementation where the - activity of searching an unexpected message queue and posting a receive - must be atomic. -\layout Section - -Events and Event Queues -\begin_inset LatexCommand \label{sec:eq} - -\end_inset - - -\layout Standard - -Event queues are used to log operations performed on memory descriptors. - They can also be used to hold acknowledgements for completed -\emph on -put -\emph default - operations and to note when the data specified in a -\emph on -put -\emph default - operation has been sent (i.e., when it is safe to reuse the buffer that holds - this data). - Multiple memory descriptors can share a single event queue. -\layout Standard - -In addition to the -\family typewriter -ptl_handle_eq_t -\family default - type, the Portals API defines two types associated with events: The -\family typewriter - -\newline -ptl_event_kind_t -\family default - type defines the kinds of events that can be stored in an event queue. - The -\family typewriter -ptl_event_t -\family default - type defines a structure that holds the information associated with an - event. -\layout Standard - -The Portals API also provides four functions for dealing with event queues: - The -\emph on -PtlEQAlloc -\emph default - function is used to allocate the API resources needed for an event queue, - the -\emph on -PtlEQFree -\emph default - function is used to release these resources, the -\emph on -PtlEQGet -\emph default - function can be used to get the next event from an event queue, and the - -\emph on -PtlEQWait -\emph default - function can be used to block a process (or thread) until an event queue - has at least one event. -\layout Subsection - -Kinds of Events -\begin_inset LatexCommand \label{sec:ek-type} - -\end_inset - - -\layout LyX-Code - -typedef enum { -\newline - PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL, -\newline - PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL, -\newline - PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL, -\newline - PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL, -\newline - PTL_EVENT_ACK, -\newline - PTL_EVENT_UNLINK -\newline -} ptl_event_kind_t; -\layout Standard -\noindent -The Portals API defines fourteen types of events that can be logged in an - event queue: -\layout Description - -PTL_EVENT_GET_START A remote -\emph on -get -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_GET_END A previously initiated -\emph on -get -\emph default - operation completed successfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_GET_FAIL A previously initiated -\emph on -get -\emph default - operation completed unsuccessfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_PUT_START A remote -\emph on -put -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should should be considered - volatile until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_PUT_END A previously initiated -\emph on -put -\emph default - operation completed successfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_PUT_FAIL A previously initiated -\emph on -put -\emph default - operation completed unsuccessfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_REPLY_START A -\emph on -reply -\emph default - operation has been started on the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_END A previously initiated -\emph on -reply -\emph default - operation has completed successfully . - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_FAIL A previously initiated -\emph on -reply -\emph default - operation has completed unsuccessfully. - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_ACK An -\emph on -acknowledgement -\emph default - was received. - This event is logged when the acknowledgement is received -\layout Description - -PTL_EVENT_SEND_START An outgoing -\emph on -send -\emph default - operation has been started. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_SEND_END A previously initiated -\emph on -send -\emph default - operation has completed successfully. - This event is logged after the entire buffer has been sent and it is safe - for the application to reuse the buffer. - -\layout Description - -PTL_EVENT_SEND_FAIL A previously initiated -\emph on -send -\emph default - operation has completed unsuccessfully. - The process can safely manipulate the memory or free the memory descriptor - once it sees this event. -\layout Description - -PTL_EVENT_UNLINK A memory descriptor associated with this event queue has - been automatically unlinked. - This event is not generated when a memory descriptor is explicitly unlinked - by calling -\shape italic -PtlMDUnlink -\shape default -. - This event does not decrement the threshold count. -\layout Subsection - -Event Ordering -\layout Standard - -The Portals API guarantees that a when a process initiates two operations - on a remote process, the operations will be initiated on the remote process - in the same order that they were initiated on the original process. - As an example, if process A intitates two -\emph on -put -\emph default - operations, -\emph on -x -\emph default - and -\emph on -y -\emph default -, on process B, the Portals API guarantees that process A will receive the - -\family typewriter -PTL_EVENT_SEND_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default - in the same order that process B receives the -\family typewriter -PTL_EVENT_PUT_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default -. - Notice that the API does not guarantee that the start events will be delivered - in the same order that process A initiated the -\emph on -x -\emph default - and -\emph on -y -\emph default - operations. - If process A needs to ensure the ordering of these operations, it should - include code to wait for the initiation of -\emph on -x -\emph default - before it initiates -\emph on -y -\emph default -. -\layout Subsection - -Failure Notification -\layout Standard - -Operations may fail to complete successfully; however, unless the node itself - fails, every operation that is started will eventually complete. - While an operation is in progress, the memory associated with the operation - should not be viewed (in the case of a put or a reply) or altered (in the - case of a send or get). - Operation completion, whether successful or unsuccessful, is final. - That is, when an operation completes, the memory associated with the operation - will no longer be read or altered by the operation. - A network interface can use the -\family typewriter -ptl_ni_fail_t -\family default - to define more specific information regarding the failure of the operation - and record this information in the -\family typewriter -ni_fail_type -\family default - field of the event. -\layout Subsection - -The Event Type -\begin_inset LatexCommand \label{sec:event-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_event_kind_t type; -\newline - ptl_process_id_t initiator; -\newline - ptl_uid_t uid; -\layout LyX-Code - - ptl_pt_index_t portal; -\newline - ptl_match_bits_t match_bits; -\newline - ptl_size_t rlength; -\newline - ptl_size_t mlength; -\newline - ptl_size_t offset; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_hdr_data_t hdr_data; -\newline - ptl_seq_t link; -\newline - ptl_ni_fail_t ni_fail_type; -\newline - volatile ptl_seq_t sequence; -\newline -} ptl_event_t; -\layout Standard -\noindent -An event structure includes the following members: -\layout Description - -type Indicates the type of the event. - -\layout Description - -initiator The id of the initiator. - -\layout Description - -portal The Portal table index specified in the request. - -\layout Description - -match_bits A copy of the match bits specified in the request. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for more information on match bits. - -\layout Description - -rlength The length (in bytes) specified in the request. - -\layout Description - -mlength The length (in bytes) of the data that was manipulated by the operation. - For truncated operations, the manipulated length will be the number of - bytes specified by the memory descriptor (possibly with an offset) operation. - For all other operations, the manipulated length will be the length of - the requested operation. - -\layout Description - -offset Is the displacement (in bytes) into the memory region that the operation - used. - The offset can be determined by the operation (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - -) for a remote managed memory descriptor, or by the local memory descriptor - (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -). - -\layout Description - -md_handle Is the handle to the memory descriptor associated with the event. -\layout Description - -mem_desc Is the state of the memory descriptor immediately after the event - has been processed. - -\layout Description - -hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -). - -\layout Description - -link The -\emph on -link -\emph default - member is used to link -\family typewriter -START -\family default - events with the -\family typewriter -END -\family default - or -\family typewriter -FAIL -\family default - event that signifies completion of the operation. - The -\emph on -link -\emph default - member will be the same for the two events associated with an operation. - The link member is also used to link an -\family typewriter -UNLINK -\family default - event with the event that caused the memory descriptor to be unlinked. -\layout Description - -sequence The sequence number for this event. - Sequence numbers are unique to each event. -\layout Comment - -The -\emph on -sequence -\emph default - member is the last member and is volatile to support SMP implementations. - When an event structure is filled in, the -\emph on -sequence -\emph default - member should be written after all other members have been updated. - Moreover, a memory barrier should be inserted between the updating of other - members and the updating of the -\emph on -sequence -\emph default - member. -\layout Subsection - -PtlEQAlloc -\begin_inset LatexCommand \label{sec:eqalloc} - -\end_inset - - -\layout LyX-Code - -int PtlEQAlloc( ptl_handle_ni_t interface, -\newline - ptl_size_t count, -\newline - ptl_handle_eq_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlEQAlloc -\emph default - function is used to build an event queue. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - event queue. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface with which the event queue will be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -count -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The number of events that can be stored in the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQFree -\begin_inset LatexCommand \label{sec:eqfree} - -\end_inset - - -\layout LyX-Code - -int PtlEQFree( ptl_handle_eq_t eventq ); -\layout Standard -\noindent -The -\emph on -PtlEQFree -\emph default - function releases the resources associated with an event queue. - It is up to the user to insure that no memory descriptors are associated - with the event queue once it is freed. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the event queue to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQGet -\begin_inset LatexCommand \label{sec:eqget} - -\end_inset - - -\layout LyX-Code - -int PtlEQGet( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQGet -\emph default - function is a nonblocking function that can be used to get the next event - in an event queue. - The event is removed from the queue. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_EQ_EMPTY Indicates that -\family typewriter -eventq -\family default - is empty or another thread is waiting on -\emph on -PtlEQWait -\emph default -. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQWait -\begin_inset LatexCommand \label{sec:eqwait} - -\end_inset - - -\layout LyX-Code - -int PtlEQWait( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQWait -\emph default - function can be used to block the calling process (thread) until there - is an event in an event queue. - This function also returns the next event in the event queue and removes - this event from the queue. - This is the only blocking operation in the Portals 3.2 API. - In the event that multiple threads are waiting on the same event queue, - PtlEQWait is guaranteed to wake exactly one thread, but the order in which - they are awakened is not specified. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - queue handle. - -\layout Subsubsection - -Arguments -\layout Standard -\noindent - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue to wait on. - The calling process (thread) will be blocked until -\family typewriter -eventq -\family default - is not empty. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -The Access Control Table -\begin_inset LatexCommand \label{sec:ac} - -\end_inset - - -\layout Standard - -Processes can use the access control table to control which processes are - allowed to perform operations on Portal table entries. - Each communication interface has a Portal table and an access control table. - The access control table for the default interface contains an entry at - index zero that allows all processes with the same user id to communicate. - Entries in the access control table can be manipulated using the -\emph on -PtlACEntry -\emph default - function. -\layout Subsection - -PtlACEntry -\begin_inset LatexCommand \label{sec:acentry} - -\end_inset - - -\layout LyX-Code - -int PtlACEntry( ptl_handle_ni_t interface, -\newline - ptl_ac_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_uid_t user_id, -\newline - ptl_pt_index_t portal ); -\layout Standard -\noindent -The -\emph on -PtlACEntry -\emph default - function can be used to update an entry in the access control table for - an interface. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_AC_INV_INDEX Indicates that -\family typewriter -index -\family default - is not a valid access control table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_PT_INV_INDEX Indicates that -\family typewriter -portal -\family default - is not a valid Portal table index. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the interface to use. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index of the entry in the access control table to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the process(es) that are allowed to perform operations. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -user_id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the user that is allowed to perform operations. - The value -\family typewriter -PTL_UID_ANY -\family default - can be used to wildcard the user. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the Portal index(es) that can be used. - The value -\family typewriter -PTL_PT_INDEX_ANY -\family default - can be used to wildcard the Portal index. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Data Movement Operations -\begin_inset LatexCommand \label{sec:datamovement} - -\end_inset - - -\layout Standard - -The Portals API provides two data movement operations: -\emph on -PtlPut -\emph default - and -\emph on -PtlGet -\emph default -. -\layout Subsection - -PtlPut -\begin_inset LatexCommand \label{sec:put} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; -\newline - -\newline -int PtlPut( ptl_handle_md_t mem_desc, -\newline - ptl_ack_req_t ack_req, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset, -\newline - ptl_hdr_data_t hdr_data ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ack_req_t -\family default - are used to control whether an acknowledgement should be sent when the - operation completes (i.e., when the data has been written to a memory descriptor - of the -\family typewriter -target -\family default - process). - The value -\family typewriter -PTL_ACK_REQ -\family default - requests an acknowledgement, the value -\family typewriter -PTL_NOACK_REQ -\family default - requests that no acknowledgement should be generated. -\layout Standard - -The -\emph on -PtlPut -\emph default - function initiates an asynchronous put operation. - There are several events associated with a put operation: initiation of - the send on the local node ( -\family typewriter -PTL_EVENT_SEND_START -\family default -), completion of the send on the local node ( -\family typewriter -PTL_EVENT_SEND_END -\family default - or -\family typewriter -PTL_EVENT_SEND_FAIL -\family default -), and, when the send completes successfully, the receipt of an acknowledgement - ( -\family typewriter -PTL_EVENT_ACK -\family default -) indicating that the operation was accepted by the target. - These events will be logged in the event queue associated with the memory - descriptor ( -\family typewriter -mem_desc -\family default -) used in the put operation. - Using a memory descriptor that does not have an associated event queue - results in these events being discarded. - In this case, the application must have another mechanism (e.g., a higher - level protocol) for determining when it is safe to modify the memory region - associated with the memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="8" columns="3"> -<features> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory to be sent. - If the memory descriptor has an event queue associated with it, it will - be used to record events when the message has been sent (PTL_EVENT_SEND_START, - PTL_EVENT_SEND_END). - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ack_req -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Controls whether an acknowledgement event is requested. - Acknowledgements are only sent when they are requested by the initiating - process -\series bold -and -\series default - the memory descriptor has an event queue -\series bold -and -\series default - the target memory descriptor enables them. - Allowed constants: -\family typewriter -PTL_ACK_REQ -\family default -, -\family typewriter -PTL_NOACK_REQ -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -hdr_data -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -64 bits of user data that can be included in message header. - This data is written to an event queue entry at the target if an event - queue is present on the matching memory descriptor. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlGet -\begin_inset LatexCommand \label{sec:get} - -\end_inset - - -\layout LyX-Code - -int PtlGet( ptl_handle_md_t mem_desc, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset ); -\layout Standard -\noindent -The -\emph on -PtlGet -\emph default - function initiates a remote read operation. - There are two event pairs associated with a get operation , when the data - is sent from the remote node, a -\family typewriter -PTL_EVENT_GET{START|END} -\family default - event pair is registered on the remote node; and when the data is returned - from the remote node a -\family typewriter -PTL_EVENT_REPLY{START|END} -\family default - event pair is registered on the local node. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="6" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory into which - the requested data will be received. - The memory descriptor can have an event queue associated with it to record - events, such as when the message receive has started ( -\family typewriter -PTL_EVENT_REPLY -\family default -_ -\family typewriter -START -\family default -). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Summary -\layout Standard - - -\begin_inset LatexCommand \label{sec:summary} - -\end_inset - - We conclude this section by summarizing the names introduced by the Portals - 3.2 API. - We start by summarizing the names of the types introduced by the API. - This is followed by a summary of the functions introduced by the API. - Which is followed by a summary of the function return codes. - Finally, we conclude with a summary of the other constant values introduced - by the API. -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - - presents a summary of the types defined by the Portals API. - The first column in this table gives the type name, the second column gives - a brief description of the type, the third column identifies the section - where the type is defined, and the fourth column lists the functions that - have arguments of this type. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Types Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:types} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\noindent - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="25" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2in"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.2in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Sect -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Functions -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for an access control table -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlACEntry, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -acknowledgement request types -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlPut -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -kinds of events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -information about events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -plt_seq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -event sequence number -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_any_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for any object -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for event queues -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert, - PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_me_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for match entries -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_ni_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut, - PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -node identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlGetId,PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetId, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -user indentifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetUid, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -insertion position (before or after) -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -identifiers for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -match (and ignore) bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mb-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ni_fail_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -network interface-specific failures -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -process identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for Portal tables -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -sizes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:size-t} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_value_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -values in status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -unlink options -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - - presents a summary of the functions defined by the Portals API. - The first column in this table gives the name for the function, the second - column gives a brief description of the operation implemented by the function, - and the third column identifies the section where the function is defined. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Functions Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:func} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="24" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Operation -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlACEntry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update an entry in an access control table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQAlloc -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the next event from an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQFree -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - release the resources for an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQWait -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - wait for a new event in an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a get operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGetId -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the id for the current process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a memory descriptor and attach it to a match entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDBind -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a free-floating memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a memory descriptor from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUpdate -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update a memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a Portal table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a free Portal table entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:attachany} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEInsert -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a match entry and insert it in a list -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a match entry from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIDist -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the distance to another process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIHandle -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the network interface handle for an object -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIStatus -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - read a network interface status register -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlPut -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a put operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - - summarizes the return codes used by functions defined by the Portals API. - All of these constants are integer values. - The first column of this table gives the symbolic name for the constant, - the second column gives a brief description of the value, and the third - column identifies the functions that can return this value. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Function Return Codes for the Portals 3.2 API -\begin_inset LatexCommand \label{tab:retcodes} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="27" columns="3"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.6in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Functions -\series default - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_AC_INV_INDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_DROPPED -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -at least one event has been dropped -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet, PtlWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_EMPTY -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -no events available in an event queue -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -error during initialization or cleanup -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlInit, PtlFini -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ILL_MD -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -illegal memory descriptor values -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDBind, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_DUP -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -duplicate initialization of an interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_INV -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -initialization of an invalid interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -the ME already has an MD -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ASIZE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table size -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_EQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUpdate, PtlEQFree, PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_HANDLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid memory descriptor handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUnlink, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ME -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid match entry handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid network interface handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PROC -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PTINDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid Portal table index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_REG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_SR_INDX -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ML_TOOLONG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match list too long -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -MD has pending operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMDUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOINIT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -uninitialized API -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\emph default -, except PtlInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOSPACE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insufficient memory -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOUPDATE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - no update was performed -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_FULL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -Portal table is full -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_OK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - success -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SEGV -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -addressing violation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate, - PtlEQAlloc, PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - summarizes the remaining constant values introduced by the Portals API. - The first column in this table presents the symbolic name for the constant, - the second column gives a brief description of the value, the third column - identifies the type for the value, and the fourth column identifies the - sections in which the value is mentioned. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Other Constants Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:oconsts} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="36" columns="5"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Base type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Intr. -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Ref. -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ACK_REQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -request an acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_NONE -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -a NULL event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -unlink event -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for process id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for node id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for user id -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_IFACE_DEFAULT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -default interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_AFTER -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert after -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_BEFORE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert before -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_ACK_DISABLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to disable acknowledgements -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_MANAGE_REMOTE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable the use of remote offsets -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_GET -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable get operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_PUT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable put operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_THRESH_INF -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -infinite threshold for a memory descriptor -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_TRUNCATE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable truncation of a request -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOACK_REQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -request no acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_INDEX_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for Portal indexes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_RETAIN -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -disable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SR_DROP_COUNT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -index for the dropped count register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -enable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Chapter - -The Semantics of Message Transmission -\begin_inset LatexCommand \label{sec:semantics} - -\end_inset - - -\layout Standard - -The portals API uses four types of messages: put requests, acknowledgements, - get requests, and replies. - In this section, we describe the information passed on the wire for each - type of message. - We also describe how this information is used to process incoming messages. -\layout Section - -Sending Messages -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:put-wire} - -\end_inset - - summarizes the information that is transmitted for a put request. - The first column provides a descriptive name for the information, the second - column provides the type for this information, the third column identifies - the source of the information, and the fourth column provides additional - notes. - Most information that is transmitted is obtained directly from the -\emph on -PtlPut -\emph default - operation. - Notice that the handle for the memory descriptor used in the -\emph on -PtlPut -\emph default - operation is transmitted even though this value cannot be interpreted by - the target. - A value of anything other than -\family typewriter -PTL_MD_NONE -\family default -, is interpreted as a request for an acknowledgement. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Put Request -\begin_inset LatexCommand \label{tab:put-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="12" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlPut -\emph default - arg -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a put request -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -no ack if -\family typewriter -PTL_MD_NONE -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family roman -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -start -\family default - and -\family typewriter -length -\family default - members -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:ack-wire} - -\end_inset - - summarizes the information transmitted in an acknowledgement. - Most of the information is simply echoed from the put request. - Notice that the initiator and target are obtained directly from the put - request, but are swapped in generating the acknowledgement. - The only new piece of information in the acknowledgement is the manipulated - length which is determined as the put request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in an Acknowledgement -\begin_inset LatexCommand \label{tab:ack-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="10" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:get-wire} - -\end_inset - - summarizes the information that is transmitted for a get request. - Like the information transmitted in a put request, most of the information - transmitted in a get request is obtained directly from the -\emph on -PtlGet -\emph default - operation. - Unlike put requests, get requests do not include the event queue handle. - In this case, the reply is generated whenever the operation succeeds and - the memory descriptor must not be unlinked until the reply is received. - As such, there is no advantage to explicitly sending the event queue handle. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Get Request -\begin_inset LatexCommand \label{tab:get-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlGet -\emph default - argument -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a get operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:reply-wire} - -\end_inset - - summarizes the information transmitted in a reply. - Like an acknowledgement, most of the information is simply echoed from - the get request. - The initiator and target are obtained directly from the get request, but - are swapped in generating the acknowledgement. - The only new information in the acknowledgement are the manipulated length - and the data, which are determined as the get request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Reply -\begin_inset LatexCommand \label{tab:reply-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Section - -Receiving Messages -\begin_inset LatexCommand \label{sec:receiving} - -\end_inset - - -\layout Standard - -When an incoming message arrives on a network interface, the communication - system first checks that the target process identified in the request is - a valid process that has initialized the network interface (i.e., that the - target process has a valid Portal table). - If this test fails, the communication system discards the message and increment -s the dropped message count for the interface. - The remainder of the processing depends on the type of the incoming message. - Put and get messages are subject to access control checks and translation - (searching a match list), while acknowledgement and reply messages bypass - the access control checks and the translation step. -\layout Standard - -Acknowledgement messages include a handle for the memory descriptor used - in the original -\emph on -PtlPut -\emph default - operation. - This memory descriptor will identify the event queue where the event should - be recorded. - Upon receipt of an acknowledgement, the runtime system only needs to confirm - that the memory descriptor and event queue still exist and that there is - space for another event. - Should the any of these conditions fail, the message is simply discarded - and the dropped message count for the interface is incremented. - Otherwise, the system builds an acknowledgement event from the information - in the acknowledgement message and adds it to the event queue. -\layout Standard - -Reception of reply messages is also relatively straightforward. - Each reply message includes a handle for a memory descriptor. - If this descriptor exists, it is used to receive the message. - A reply message will be dropped if the memory descriptor identified in - the request doesn't exist. - In either of this case, the dropped message count for the interface is - incremented. - These are the only reasons for dropping reply messages. - Every memory descriptor accepts and truncates incoming reply messages, - eliminating the other potential reasons for rejecting a reply message. -\layout Standard - -The critical step in processing an incoming put or get request involves - mapping the request to a memory descriptor. - This step starts by using the Portal index in the incoming request to identify - a list of match entries. - This list of match entries is searched in order until a match entry is - found whose match criteria matches the match bits in the incoming request - and whose memory descriptor accepts the request. -\layout Standard - -Because acknowledge and reply messages are generated in response to requests - made by the process receiving these messages, the checks performed by the - runtime system for acknowledgements and replies are minimal. - In contrast, put and get messages are generated by remote processes and - the checks performed for these messages are more extensive. - Incoming put or get messages may be rejected because: -\layout Itemize - -the Portal index supplied in the request is not valid; -\layout Itemize - -the cookie supplied in the request is not a valid access control entry; - -\layout Itemize - -the access control entry identified by the cookie does not match the identifier - of the requesting process; -\layout Itemize - -the access control entry identified by the access control entry does not - match the Portal index supplied in the request; or -\layout Itemize - -the match bits supplied in the request do not match any of the match entries - with a memory descriptor that accepts the request. - -\layout Standard - -In all cases, if the message is rejected, the incoming message is discarded - and the dropped message count for the interface is incremented. -\layout Standard - -A memory descriptor may reject an incoming request for any of the following - reasons: -\layout Itemize - -the -\family typewriter -PTL_MD_PUT -\family default - or -\family typewriter -PTL_MD_GET -\family default - option has not been enabled and the operation is put or get, respectively; - -\layout Itemize - -the length specified in the request is too long for the memory descriptor - and the -\family typewriter -PTL_MD_TRUNCATE -\family default - option has not been enabled. -\layout Chapter - -Examples -\begin_inset LatexCommand \label{sec:examples} - -\end_inset - - -\layout Comment - -The examples presented in this chapter have not been updated to reflect - the current API. -\layout Standard - -In this section we present several example to illustrate expected usage - patterns for the Portals 3.2 API. - The first example describes how to implement parallel servers using the - features of the Portals 3.2 API. - This example covers the access control list and the use of remote managed - offsets. - The second example presents an approach to dealing with dropped requests. - This example covers aspects of match lists and memory descriptors. - The final example covers message reception in MPI. - This example illustrates more sophisticated uses of matching and a procedure - to update a memory descriptor. -\layout Section - -Parallel File Servers -\begin_inset LatexCommand \label{sec:expfs} - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:file} - -\end_inset - - illustrates the logical structure of a parallel file server. - In this case, the parallel server consists of four servers that stripe - application data across four disks. - We would like to present applications with the illusion that the file server - is a single entity. - We will assume that all of the processes that constitute the parallel server - have the same user id. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename file.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 196pt - lyxheight 147pt -\end_inset - - -\layout Caption - -Parallel File Server -\begin_inset LatexCommand \label{fig:file} - -\end_inset - - -\end_inset - - -\layout Standard - -When an application establishes a connection to the parallel file server, - it will allocate a Portal and access control list entry for communicating - with the server. - The access control list entry will include the Portal and match any process - in the parallel file server's, so all of the file server processes will - have access to the portal. - The Portal information and access control entry will be sent to the file - server at this time. - If the application and server need to have multiple, concurrent I/O operations, - they can use additional portals or match entries to keep the operations - from interfering with one another. -\layout Standard - -When an application initiates an I/O operation, it first builds a memory - descriptor that describes the memory region involved in the operation. - This memory descriptor will enable the appropriate operation (put for read - operations and get for write operations) and enable the use of remote offsets - (this lets the servers decide where their data should be placed in the - memory region). - After creating the memory descriptor and linking it into the appropriate - Portal entry, the application sends a read or write request (using -\emph on -PtlPut -\emph default -) to one of the file server processes. - The file server processes can then use put or get operations with the appropria -te offsets to fill or retrieve the contents of the application's buffer. - To know when the operation has completed, the application can add an event - queue to the memory descriptor and add up the lengths of the remote operations - until the sum is the size of the requested I/O operation. -\layout Section - -Dealing with Dropped Requests -\begin_inset LatexCommand \label{sec:exdrop} - -\end_inset - - -\layout Standard - -If a process does not anticipate unexpected requests, they will be discarded. - Applications using the Portals API can query the dropped count for the - interface to determine the number of requests that have been dropped (see - Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - While this approach minimizes resource consumption, it does not provide - information that might be critical in debugging the implementation of a - higher level protocol. -\layout Standard - -To keep track of more information about dropped requests, we use a memory - descriptor that truncates each incoming request to zero bytes and logs - the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - operations in an event queue. - Note that the operations are not dropped in the Portals sense, because - the operation succeeds. -\layout Standard - -The following code fragment illustrates an implementation of this approach. - In this case, we assume that a thread is launched to execute the function - -\family typewriter -watch_drop -\family default -. - This code starts by building an event queue to log truncated operations - and a memory descriptor to truncate the incoming requests. - This example only captures -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests for a single portal. - In a more realistic situation, the memory descriptor would be appended - to the match list for every portal. - We also assume that the thread is capable of keeping up with the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests. - If this is not the case, we could use a finite threshold on the memory - descriptor to capture the first few dropped requests. -\layout LyX-Code - - -\size small -#include <stdio.h> -\newline -#include <stdlib.h> -\newline -#include <portals.h> -\newline - -\newline -#define DROP_SIZE 32 /* number of dropped requests to track */ -\newline - -\newline -int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) { -\newline - ptl_handle_eq_t drop_events; -\newline - ptl_event_t event; -\newline - ptl_handle_md_t drop_em; -\newline - ptl_md_t drop_desc; -\newline - ptl_process_id_t any_proc; -\newline - ptl_handle_me_t match_any; -\newline - -\newline - /* create the event queue */ -\newline - if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the event queue -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* build a match entry */ -\newline - any_proc.nid = PTL_ID_ANY; -\newline - any_proc.pid = PTL_ID_ANY; -\newline - PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN, -\newline - &match_any ); -\newline - -\newline - /* create the memory descriptor */ -\newline - drop_desc.start = NULL; -\newline - drop_desc.length = 0; -\newline - drop_desc.threshold = PTL_MD_THRESH_INF; -\newline - drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE; -\newline - drop_desc.user_ptr = NULL; -\newline - drop_desc.eventq = drop_events; -\newline - if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the memory descriptor -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* watch for "dropped" requests */ -\newline - while( 1 ) { -\newline - if( PtlEQWait( drop_events, &event ) != PTL_OK ) break; -\newline - fprintf( stderr, "Dropped request from gid = event.initiator.gid, - event.initiator.rid ); -\newline - } -\newline -} -\layout Section - -Message Transmission in MPI -\begin_inset LatexCommand \label{sec:exmpi} - -\end_inset - - -\layout Standard - -We conclude this section with a fairly extensive example that describes - an approach to implementing message transmission for MPI. - Like many MPI implementations, we distinguish two message transmission - protocols: a short message protocol and a long message protocol. - We use the constant -\family typewriter -MPI_LONG_LENGTH -\family default - to determine the size of a long message. -\layout Standard - -For small messages, the sender simply sends the message and presumes that - the message will be received (i.e., the receiver has allocated a memory region - to receive the message body). - For large messages, the sender also sends the message, but does not presume - that the message body will be saved. - Instead, the sender builds a memory descriptor for the message and enables - get operations on this descriptor. - If the target does not save the body of the message, it will record an - event for the put operation. - When the process later issues a matching MPI receive, it will perform a - get operation to retrieve the body of the message. -\layout Standard - -To facilitate receive side matching based on the protocol, we use the most - significant bit in the match bits to indicate the protocol: 1 for long - messages and 0 for short messages. -\layout Standard - -The following code presents a function that implements the send side of - the protocol. - The global variable -\family typewriter -EndGet -\family default - is the last match entry attached to the Portal index used for posting long - messages. - This entry does not match any incoming requests (i.e., the memory descriptor - rejects all get operations) and is built during initialization of the MPI - library. - The other global variable, -\family typewriter -MPI_NI -\family default -, is a handle for the network interface used by the MPI implementation. -\layout LyX-Code - - -\size small -extern ptl_handle_me_t EndGet; -\newline -extern ptl_handle_ni_t MPI_NI; -\newline - -\newline -void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq, -\newline - ptl_process_id target, ptl_match_bits_t match ) -\newline -{ -\newline - ptl_handle_md_t send_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_ack_req_t want_ack; -\newline - -\newline - mem_desc.start = buf; -\newline - mem_desc.length = len; -\newline - mem_desc.threshold = 1; -\newline - mem_desc.options = PTL_MD_GET_OP; -\newline - mem_desc.user_ptr = data; -\newline - mem_desc.eventq = eventq; -\newline - -\newline - if( len >= MPI_LONG_LENGTH ) { -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - /* add a match entry to the end of the get list */ -\newline - PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet, - &me_handle ); -\newline - PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL ); -\newline - -\newline - /* we want an ack for long messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a long message - */ -\newline - match |= 1<<63; -\newline - } else { -\newline - /* we don't want an ack for short messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a short message - */ -\newline - match &= ~(1<<63); -\newline - } -\newline - -\newline - /* create a memory descriptor and send it */ -\newline - PtlMDBind( MPI_NI, mem_desc, &send_handle ); -\newline - PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match, - 0 ); -\newline -} -\layout Standard - -The -\emph on -MPISend -\emph default - function returns as soon as the message has been scheduled for transmission. - The event queue argument, -\family typewriter -eventq -\family default -, can be used to determine the disposition of the message. - Assuming that -\family typewriter -eventq -\family default - is not -\family typewriter -PTL_EQ_NONE -\family default -, a -\family typewriter -PTL_EVENT_SENT -\family default - event will be recorded for each message as the message is transmitted. - For small messages, this is the only event that will be recorded in -\family typewriter -eventq -\family default -. - In contrast, long messages include an explicit request for an acknowledgement. - If the -\family typewriter -target -\family default - process has posted a matching receive, the acknowledgement will be sent - as the message is received. - If a matching receive has not been posted, the message will be discarded - and no acknowledgement will be sent. - When the -\family typewriter -target -\family default - process later issues a matching receive, the receive will be translated - into a get operation and a -\family typewriter -PTL_EVENT_GET -\family default - event will be recorded in -\family typewriter -eventq -\family default -. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:mpi} - -\end_inset - - illustrates the organization of the match list used for receiving MPI messages. - The initial entries (not shown in this figure) would be used to match the - MPI receives that have been preposted by the application. - The preposted receives are followed by a match entry, -\emph on -RcvMark -\emph default -, that marks the boundary between preposted receives and the memory descriptors - used for -\begin_inset Quotes eld -\end_inset - -unexpected -\begin_inset Quotes erd -\end_inset - - messages. - The -\emph on -RcvMark -\emph default - entry is followed by a small collection of match entries that match unexpected - -\begin_inset Quotes eld -\end_inset - -short -\begin_inset Quotes erd -\end_inset - - messages, i.e., messages that have a 0 in the most significant bit of their - match bits. - The memory descriptors associated with these match entries will append - the incoming message to the associated memory descriptor and record an - event in an event queue for unexpected messages. - The unexpected short message matching entries are followed by a match entry - that will match messages that were not matched by the preceding match entries, - i.e., the unexpected long messages. - The memory descriptor associated with this match entry truncates the message - body and records an event in the event queue for unexpected messages. - Note that of the memory descriptors used for unexpected messages share - a common event queue. - This makes it possible to process the unexpected messages in the order - in which they arrived, regardless of. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename mpi.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 389pt - lyxheight 284pt -\end_inset - - -\layout Caption - -Message Reception in MPI -\begin_inset LatexCommand \label{fig:mpi} - -\end_inset - - -\end_inset - - -\layout Standard - -When the local MPI process posts an MPI receive, we must first search the - events unexpected message queue to see if a matching message has already - arrived. - If no matching message is found, a match entry for the receive is inserted - before the -\emph on -RcvMark -\emph default - entry--after the match entries for all of the previously posted receives - and before the match entries for the unexpected messages. - This ensures that preposted receives are matched in the order that they - were posted (a requirement of MPI). - -\layout Standard - -While this strategy respects the temporal semantics of MPI, it introduces - a race condition: a matching message might arrive after the events in the - unexpected message queue have been searched, but before the match entry - for the receive has been inserted in the match list. - -\layout Standard - -To avoid this race condition we start by setting the -\family typewriter -threshold -\family default - of the memory descriptor to 0, making the descriptor inactive. - We then insert the match entry into the match list and proceed to search - the events in the unexpected message queue. - A matching message that arrives as we are searching the unexpected message - queue will not be accepted by the memory descriptor and, if not matched - by an earlier match list element, will add an event to the unexpected message - queue. - After searching the events in the unexpected message queue, we update the - memory descriptor, setting the threshold to 1 to activate the memory descriptor. - This update is predicated by the condition that the unexpected message - queue is empty. - We repeat the process of searching the unexpected message queue until the - update succeeds. -\layout Standard - -The following code fragment illustrates this approach. - Because events must be removed from the unexpected message queue to be - examined, this code fragment assumes the existence of a user managed event - list, -\family typewriter -Rcvd -\family default -, for the events that have already been removed from the unexpected message - queue. - In an effort to keep the example focused on the basic protocol, we have - omitted the code that would be needed to manage the memory descriptors - used for unexpected short messages. - In particular, we simply leave messages in these descriptors until they - are received by the application. - In a robust implementation, we would introduce code to ensure that short - unexpected messages are removed from these memory descriptors so that they - can be re-used. -\layout LyX-Code - - -\size small -extern ptl_handle_eq_t UnexpQueue; -\newline -extern ptl_handle_me_t RcvMark; -\newline -extern ptl_handle_me_t ShortMatch; -\newline - -\newline -typedef struct event_list_tag { -\newline - ptl_event_t event; -\newline - struct event_list_tag* next; -\newline -} event_list; -\newline - -\newline -extern event_list Rcvd; -\newline - -\newline -void AppendRcvd( ptl_event_t event ) -\newline -{ -\newline - /* append an event onto the Rcvd list */ -\newline -} -\newline - -\newline -int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi -ts_t match, -\newline - ptl_match_bits_t ignore, ptl_event_t *event ) -\newline -{ -\newline - /* Search the Rcvd event queue, looking for a message that matches the - requested message. -\newline - * If one is found, remove the event from the Rcvd list and return it. - */ -\newline -} -\newline - -\newline -typedef enum { RECEIVED, POSTED } receive_state; -\newline - -\newline -receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event, - ptl_md_t md_buf ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - if( event.rlength >= MPI_LONG_LENGTH ) { -\newline - PtlMDBind( MPI_NI, md_buf, &md_handle ); -\newline - PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX, - md_handle ); -\newline - return POSTED; -\newline - } else { -\newline - /* copy the message */ -\newline - if( event.mlength < *length ) *length = event.mlength; -\newline - memcpy( buf, (char*)event.md_desc.start+event.offset, *length ); -\newline - return RECEIVED; -\newline - } -\newline -} -\newline - -\newline -receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle -_eq_t eventq, -\newline - ptl_process_id_t sender, ptl_match_bits_t match, - ptl_match_bits_t ignore ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_handle_me_t me_handle; -\newline - ptl_event_t event; -\newline - -\newline - /* build a memory descriptor for the receive */ -\newline - md_buf.start = buf; -\newline - md_buf.length = *len; -\newline - md_buf.threshold = 0; /* temporarily disabled */ -\newline - md_buf.options = PTL_MD_PUT_OP; -\newline - md_buf.user_ptr = MPI_data; -\newline - md_buf.eventq = eventq; -\newline - -\newline - /* see if we have already received the message */ -\newline - if( SearchRcvd(buf, len, sender, match, ignore, &event) ) -\newline - return CopyMsg( buf, len, event, md_buf ); -\newline - -\newline - /* create the match entry and attach the memory descriptor */ -\newline - PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark, - &me_handle); -\newline - PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle ); -\newline - -\newline - md_buf.threshold = 1; -\newline - do -\newline - if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) { -\newline - if( MPIMatch(event, match, ignore, sender) ) { -\newline - return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset, - md_buf ); -\newline - } else { -\newline - AppendRcvd( event ); -\newline - } -\newline - } -\newline - while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE - ); -\newline - return POSTED; -\newline -} -\layout Chapter* - -Acknowledgments -\layout Standard - -Several people have contributed to the philosophy, design, and implementation - of the Portals message passing architecture as it has evolved. - We acknowledge the following people for their contributions: Al Audette, - Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike - Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke, - Dave van Dresser, Lee Ward, and Stephen Wheat. - -\layout Standard - - -\begin_inset LatexCommand \BibTeX[ieee]{portals3} - -\end_inset - - -\the_end diff --git a/lnet/doc/put.fig b/lnet/doc/put.fig deleted file mode 100644 index 5235b6d7880836321ca385bdc4bedfa30c92e30b..0000000000000000000000000000000000000000 --- a/lnet/doc/put.fig +++ /dev/null @@ -1,32 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1350 900 2175 1200 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1275 2700 1725 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 1200 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2699 1788 899 1938 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001 -4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001 -4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 diff --git a/lnet/include/.cvsignore b/lnet/include/.cvsignore deleted file mode 100644 index 94d3790678c916e364e1ab73c431e7e3c2d88b8b..0000000000000000000000000000000000000000 --- a/lnet/include/.cvsignore +++ /dev/null @@ -1,6 +0,0 @@ -config.h -stamp-h -stamp-h1 -stamp-h.in -Makefile -Makefile.in diff --git a/lnet/include/Makefile.am b/lnet/include/Makefile.am deleted file mode 100644 index 006180b83629fa90d694582309745038d9c35e16..0000000000000000000000000000000000000000 --- a/lnet/include/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = libcfs lnet - -EXTRA_DIST = cygwin-ioctl.h diff --git a/lnet/include/cygwin-ioctl.h b/lnet/include/cygwin-ioctl.h deleted file mode 100644 index 8a33957adbd9870e3ef354cdc7c7da3c68fdb98d..0000000000000000000000000000000000000000 --- a/lnet/include/cygwin-ioctl.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * linux/ioctl.h for Linux by H.H. Bergman. - */ - -#ifndef _ASMI386_IOCTL_H -#define _ASMI386_IOCTL_H - -/* ioctl command encoding: 32 bits total, command in lower 16 bits, - * size of the parameter structure in the lower 14 bits of the - * upper 16 bits. - * Encoding the size of the parameter structure in the ioctl request - * is useful for catching programs compiled with old versions - * and to avoid overwriting user space outside the user buffer area. - * The highest 2 bits are reserved for indicating the ``access mode''. - * NOTE: This limits the max parameter size to 16kB -1 ! - */ - -/* - * The following is for compatibility across the various Linux - * platforms. The i386 ioctl numbering scheme doesn't really enforce - * a type field. De facto, however, the top 8 bits of the lower 16 - * bits are indeed used as a type field, so we might just as well make - * this explicit here. Please be sure to use the decoding macros - * below from now on. - */ -#undef _IO -#undef _IOR -#undef _IOW -#undef _IOC -#undef IOC_IN -#undef IOC_OUT - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits. - */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode ioctl numbers.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* ...and for the drivers/sound files... */ - -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif /* _ASMI386_IOCTL_H */ diff --git a/lnet/include/libcfs/.cvsignore b/lnet/include/libcfs/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/Makefile.am b/lnet/include/libcfs/Makefile.am deleted file mode 100644 index 2874a52eab0496d10425560ec9220c29bcd70518..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -EXTRA_DIST := curproc.h kp30.h libcfs.h list.h lltrace.h \ - portals_utils.h types.h user-lock.h user-prim.h user-time.h diff --git a/lnet/include/libcfs/curproc.h b/lnet/include/libcfs/curproc.h deleted file mode 100644 index 6495c661d31407d7151650b65cdb07a4c7fbd3d3..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/curproc.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API declaration - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#ifndef __LIBCFS_CURPROC_H__ -#define __LIBCFS_CURPROC_H__ - -#ifdef __KERNEL__ -/* - * Portable API to access common characteristics of "current" UNIX process. - * - * Implemented in portals/include/libcfs/<os>/ - */ -uid_t cfs_curproc_uid(void); -gid_t cfs_curproc_gid(void); -uid_t cfs_curproc_fsuid(void); -gid_t cfs_curproc_fsgid(void); -pid_t cfs_curproc_pid(void); -int cfs_curproc_groups_nr(void); -int cfs_curproc_is_in_groups(gid_t group); -void cfs_curproc_groups_dump(gid_t *array, int size); -mode_t cfs_curproc_umask(void); -char *cfs_curproc_comm(void); - - -/* - * Plus, platform-specific constant - * - * CFS_CURPROC_COMM_MAX, - * - * and opaque scalar type - * - * cfs_kernel_cap_t - */ -cfs_kernel_cap_t cfs_curproc_cap_get(void); -void cfs_curproc_cap_set(cfs_kernel_cap_t cap); -#endif - -/* __LIBCFS_CURPROC_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/.cvsignore b/lnet/include/libcfs/darwin/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/darwin/Makefile.am b/lnet/include/libcfs/darwin/Makefile.am deleted file mode 100644 index f2f217a2deb6a5b140ba1e17f7ad0093f74f29a9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \ - darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \ - darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h diff --git a/lnet/include/libcfs/darwin/darwin-fs.h b/lnet/include/libcfs/darwin/darwin-fs.h deleted file mode 100644 index 5eed9efe534c798bf3615f210af15182c154735c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-fs.h +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Implementation of standard file system interfaces for XNU kernel. - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ -#ifndef __LIBCFS_DARWIN_FS_H__ -#define __LIBCFS_DARWIN_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/kernel.h> -#include <sys/file.h> -#include <sys/time.h> -#include <sys/filedesc.h> -#include <sys/mount.h> -#include <sys/stat.h> -#include <sys/sysctl.h> -#include <sys/ubc.h> -#include <sys/mbuf.h> -#include <sys/namei.h> -#include <sys/fcntl.h> -#include <sys/lockf.h> -#include <stdarg.h> - -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/clock.h> -#include <sys/param.h> -#include <IOKit/system.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-mem.h> -#include <libcfs/list.h> - -/* - * File operating APIs in kernel - */ -#ifdef __DARWIN8__ -/* - * Kernel file descriptor - */ -typedef struct cfs_kern_file { - int f_flags; - vnode_t f_vp; - vfs_context_t f_ctxt; -} cfs_file_t; - -#else - -typedef struct file cfs_file_t; - -#endif - -int kern_file_size(cfs_file_t *fp, off_t *size); -#define cfs_filp_size(fp) \ - ({ \ - off_t __size; \ - kern_file_size((fp), &__size); \ - __size; \ - }) -#define cfs_filp_poff(fp) (NULL) - -cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err); -int kern_file_close(cfs_file_t *fp); -int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); -int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); -int kern_file_sync(cfs_file_t *fp); - -#define cfs_filp_open(n, f, m, e) kern_file_open(n, f, m, e) -#define cfs_filp_close(f) kern_file_close(f) -#define cfs_filp_read(f, b, n, p) kern_file_read(f, b, n, p) -#define cfs_filp_write(f, b, n, p) kern_file_write(f, b, n, p) -#define cfs_filp_fsync(f) kern_file_sync(f) - -int ref_file(cfs_file_t *fp); -int rele_file(cfs_file_t *fp); -int file_count(cfs_file_t *fp); -#define cfs_get_file(f) ref_file(f) -#define cfs_put_file(f) rele_file(f) -#define cfs_file_count(f) file_count(f) - -#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) - -typedef struct flock cfs_flock_t; -#define cfs_flock_type(fl) ((fl)->l_type) -#define cfs_flock_set_type(fl, type) do { (fl)->l_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->l_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->l_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->l_start) -#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0) - -static inline loff_t cfs_flock_end(cfs_flock_t *fl) -{ - return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len)); -} - -static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end) -{ - if (end == CFS_OFFSET_MAX) - fl->l_len = 0; - else - fl->l_len = end - fl->l_start; -} - -#define ATTR_MODE 0x0001 -#define ATTR_UID 0x0002 -#define ATTR_GID 0x0004 -#define ATTR_SIZE 0x0008 -#define ATTR_ATIME 0x0010 -#define ATTR_MTIME 0x0020 -#define ATTR_CTIME 0x0040 -#define ATTR_ATIME_SET 0x0080 -#define ATTR_MTIME_SET 0x0100 -#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 0x0400 -#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -#define ATTR_CTIME_SET 0x2000 -#define ATTR_BLOCKS 0x4000 - -#define in_group_p(x) (0) - -struct posix_acl_entry { - short e_tag; - unsigned short e_perm; - unsigned int e_id; -}; - -struct posix_acl { - atomic_t a_refcount; - unsigned int a_count; - struct posix_acl_entry a_entries[0]; -}; - -struct posix_acl *posix_acl_alloc(int count, int flags); -static inline struct posix_acl *posix_acl_from_xattr(const void *value, - size_t size) -{ - return posix_acl_alloc(0, 0); -} -static inline void posix_acl_release(struct posix_acl *acl) {}; -static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; } -static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) -{ - return acl; -} - -/* - * portable UNIX device file identification. - */ - -typedef dev_t cfs_rdev_t; - -#else /* !__KERNEL__ */ - -typedef struct file cfs_file_t; - -#endif /* END __KERNEL__ */ - -typedef struct { - void *d; -} cfs_dentry_t; - -#ifndef O_SYNC -#define O_SYNC 0 -#endif -#ifndef O_DIRECTORY -#define O_DIRECTORY 0 -#endif -#ifndef O_LARGEFILE -#define O_LARGEFILE 0 -#endif - -#endif diff --git a/lnet/include/libcfs/darwin/darwin-lock.h b/lnet/include/libcfs/darwin/darwin-lock.h deleted file mode 100644 index f826fef26395d059a7ab7cbfb7eb68302077bae4..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-lock.h +++ /dev/null @@ -1,284 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__ -#define __LIBCFS_DARWIN_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <mach/sync_policy.h> -#include <mach/task.h> -#include <mach/semaphore.h> -#include <kern/assert.h> -#include <kern/thread.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-sync.h> - -/* - * spin_lock (use Linux kernel's primitives) - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - */ -struct spin_lock { - struct kspin spin; -}; - -typedef struct spin_lock spinlock_t; - -static inline void spin_lock_init(spinlock_t *lock) -{ - kspin_init(&lock->spin); -} - -static inline void spin_lock(spinlock_t *lock) -{ - kspin_lock(&lock->spin); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - kspin_unlock(&lock->spin); -} - -static inline int spin_trylock(spinlock_t *lock) -{ - return kspin_trylock(&lock->spin); -} - -static inline void spin_lock_done(spinlock_t *lock) -{ - kspin_done(&lock->spin); -} - -#error "does this lock out timer callbacks?" -#define spin_lock_bh(x) spin_lock(x) -#define spin_unlock_bh(x) spin_unlock(x) -#define spin_lock_bh_init(x) spin_lock_init(x) - -extern boolean_t ml_set_interrupts_enabled(boolean_t enable); -#define __disable_irq() ml_set_interrupts_enabled(FALSE) -#define __enable_irq(x) (void) ml_set_interrupts_enabled(x) - -#define spin_lock_irqsave(s, f) do{ \ - f = __disable_irq(); \ - spin_lock(s); }while(0) - -#define spin_unlock_irqrestore(s, f) do{ \ - spin_unlock(s); \ - __enable_irq(f);}while(0) - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -struct semaphore { - struct ksem sem; -}; - -static inline void sema_init(struct semaphore *s, int val) -{ - ksem_init(&s->sem, val); -} - -static inline void __down(struct semaphore *s) -{ - ksem_down(&s->sem, 1); -} - -static inline void __up(struct semaphore *s) -{ - ksem_up(&s->sem, 1); -} - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -struct completion { - /* - * Emulate completion by semaphore for now. - * - * XXX nikita: this is not safe if completion is used to synchronize - * exit from kernel daemon thread and kext unloading. In this case - * some core function (a la complete_and_exit()) is needed. - */ - struct ksem sem; -}; - -static inline void init_completion(struct completion *c) -{ - ksem_init(&c->sem, 0); -} - -static inline void complete(struct completion *c) -{ - ksem_up(&c->sem, 1); -} - -static inline void wait_for_completion(struct completion *c) -{ - ksem_down(&c->sem, 1); -} - -/* - * rw_semaphore: - * - * - DECLARE_RWSEM(x) - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore { - struct krw_sem s; -}; - -static inline void init_rwsem(struct rw_semaphore *s) -{ - krw_sem_init(&s->s); -} - -static inline void fini_rwsem(struct rw_semaphore *s) -{ - krw_sem_done(&s->s); -} - -static inline void down_read(struct rw_semaphore *s) -{ - krw_sem_down_r(&s->s); -} - -static inline int down_read_trylock(struct rw_semaphore *s) -{ - int ret = krw_sem_down_r_try(&s->s); - return ret == 0; -} - -static inline void down_write(struct rw_semaphore *s) -{ - krw_sem_down_w(&s->s); -} - -static inline int down_write_trylock(struct rw_semaphore *s) -{ - int ret = krw_sem_down_w_try(&s->s); - return ret == 0; -} - -static inline void up_read(struct rw_semaphore *s) -{ - krw_sem_up_r(&s->s); -} - -static inline void up_write(struct rw_semaphore *s) -{ - krw_sem_up_w(&s->s); -} - -/* - * read-write lock : Need to be investigated more!! - * - * - DECLARE_RWLOCK(l) - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ -typedef struct krw_spin rwlock_t; - -#define rwlock_init(pl) krw_spin_init(pl) - -#define read_lock(l) krw_spin_down_r(l) -#define read_unlock(l) krw_spin_up_r(l) -#define write_lock(l) krw_spin_down_w(l) -#define write_unlock(l) krw_spin_up_w(l) - -#define write_lock_irqsave(l, f) do{ \ - f = __disable_irq(); \ - write_lock(l); }while(0) - -#define write_unlock_irqrestore(l, f) do{ \ - write_unlock(l); \ - __enable_irq(f);}while(0) - -#define read_lock_irqsave(l, f) do{ \ - f = __disable_irq(); \ - read_lock(l); }while(0) - -#define read_unlock_irqrestore(l, f) do{ \ - read_unlock(l); \ - __enable_irq(f);}while(0) -/* - * Funnel: - * - * Safe funnel in/out - */ -#ifdef __DARWIN8__ - -#define CFS_DECL_FUNNEL_DATA -#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA -#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA -#define CFS_CONE_IN do {} while(0) -#define CFS_CONE_EX do {} while(0) - -#define CFS_NET_IN do {} while(0) -#define CFS_NET_EX do {} while(0) - -#else - -#define CFS_DECL_FUNNEL_DATA \ - boolean_t __funnel_state = FALSE; \ - funnel_t *__funnel -#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA -#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA - -void lustre_cone_in(boolean_t *state, funnel_t **cone); -void lustre_cone_ex(boolean_t state, funnel_t *cone); - -#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel) -#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel) - -void lustre_net_in(boolean_t *state, funnel_t **cone); -void lustre_net_ex(boolean_t state, funnel_t *cone); - -#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel) -#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel) - -#endif - -#else -#include <libcfs/user-lock.h> -#endif /* __KERNEL__ */ - -/* __XNU_CFS_LOCK_H */ -#endif diff --git a/lnet/include/libcfs/darwin/darwin-mem.h b/lnet/include/libcfs/darwin/darwin-mem.h deleted file mode 100644 index 5ffcd4e549682c52c448ca993fc7ee6472baeec1..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-mem.h +++ /dev/null @@ -1,232 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_MEM_H__ -#define __LIBCFS_DARWIN_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/vm.h> -#include <sys/kernel.h> -#include <sys/ubc.h> -#include <sys/uio.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/lockf.h> - -#include <mach/mach_types.h> -#include <mach/vm_types.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <mach/machine/vm_param.h> -#include <kern/thread_call.h> -#include <sys/param.h> -#include <sys/vm.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-sync.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/list.h> - -/* - * Basic xnu_page struct, should be binary compatibility with - * all page types in xnu (we have only xnu_raw_page, xll_page now) - */ - -/* Variable sized pages are not supported */ - -#ifdef PAGE_SHIFT -#define CFS_PAGE_SHIFT PAGE_SHIFT -#else -#define CFS_PAGE_SHIFT 12 -#endif - -#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) - -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE - 1)) - -enum { - XNU_PAGE_RAW, - XNU_PAGE_XLL, - XNU_PAGE_NTYPES -}; - -typedef __u32 page_off_t; - -/* - * For XNU we have our own page cache built on top of underlying BSD/MACH - * infrastructure. In particular, we have two disjoint types of pages: - * - * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM, - * based on UPLs, and - * - * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache - * file data, owned by file system objects, hashed, lrued, etc. - * - * cfs_page_t has to cover both of them, because core Lustre code is based on - * the Linux assumption that page is _both_ memory buffer and file system - * caching entity. - * - * To achieve this, all types of pages supported on XNU has to start from - * common header that contains only "page type". Common cfs_page_t operations - * dispatch through operation vector based on page type. - * - */ -typedef struct xnu_page { - int type; -} cfs_page_t; - -struct xnu_page_ops { - void *(*page_map) (cfs_page_t *); - void (*page_unmap) (cfs_page_t *); - void *(*page_address) (cfs_page_t *); -}; - -void xnu_page_ops_register(int type, struct xnu_page_ops *ops); -void xnu_page_ops_unregister(int type); - -/* - * raw page, no cache object, just like buffer - */ -struct xnu_raw_page { - struct xnu_page header; - void *virtual; - atomic_t count; - struct list_head link; -}; - -/* - * Public interface to lustre - * - * - cfs_alloc_page(f) - * - cfs_free_page(p) - * - cfs_kmap(p) - * - cfs_kunmap(p) - * - cfs_page_address(p) - */ - -/* - * Of all functions above only cfs_kmap(), cfs_kunmap(), and - * cfs_page_address() can be called on file system pages. The rest is for raw - * pages only. - */ - -cfs_page_t *cfs_alloc_page(u_int32_t flags); -void cfs_free_page(cfs_page_t *page); -void cfs_get_page(cfs_page_t *page); -int cfs_put_page_testzero(cfs_page_t *page); -int cfs_page_count(cfs_page_t *page); -#define cfs_page_index(pg) (0) - -void *cfs_page_address(cfs_page_t *pg); -void *cfs_kmap(cfs_page_t *pg); -void cfs_kunmap(cfs_page_t *pg); - -/* - * Memory allocator - */ - -void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -void cfs_free(void *addr); - -void *cfs_alloc_large(size_t nr_bytes); -void cfs_free_large(void *addr); - -extern int get_preemption_level(void); - -#define CFS_ALLOC_ATOMIC_TRY \ - (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0) - -/* - * Slab: - * - * No slab in OSX, use zone allocator to simulate slab - */ -#define SLAB_HWCACHE_ALIGN 0 - -#ifdef __DARWIN8__ -/* - * In Darwin8, we cannot use zalloc_noblock(not exported by kernel), - * also, direct using of zone allocator is not recommended. - */ -#define CFS_INDIVIDUAL_ZONE (0) - -#if !CFS_INDIVIDUAL_ZONE -#include <libkern/OSMalloc.h> -typedef OSMallocTag mem_cache_t; -#else -typedef void* zone_t; -typedef zone_t mem_cache_t; -#endif - -#else /* !__DARWIN8__ */ - -#define CFS_INDIVIDUAL_ZONE (1) - -typedef zone_t mem_cache_t; - -#endif /* !__DARWIN8__ */ - -#define MC_NAME_MAX_LEN 64 - -typedef struct cfs_mem_cache { - int mc_size; - mem_cache_t mc_cache; - struct list_head mc_link; - char mc_name [MC_NAME_MAX_LEN]; -} cfs_mem_cache_t; - -#define KMEM_CACHE_MAX_COUNT 64 -#define KMEM_MAX_ZONE 8192 - -cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); -int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - -/* - * Misc - */ -/* XXX Liang: num_physpages... fix me */ -#define num_physpages (64 * 1024) - -#define CFS_DECL_MMSPACE -#define CFS_MMSPACE_OPEN do {} while(0) -#define CFS_MMSPACE_CLOSE do {} while(0) - -#define copy_from_user(kaddr, uaddr, size) copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size) -#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size) - -#if 0 -static inline int strncpy_from_user(char *kaddr, char *uaddr, int size) -{ - size_t count; - return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count); -} -#endif - -#if defined (__ppc__) -#define mb() __asm__ __volatile__ ("sync" : : : "memory") -#define rmb() __asm__ __volatile__ ("sync" : : : "memory") -#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") -#elif defined (__i386__) -#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") -#define rmb() mb() -#define wmb() __asm__ __volatile__ ("": : :"memory") -#else -#error architecture not supported -#endif - -#else /* !__KERNEL__ */ - -#define CFS_CACHE_SHIFT 12 -#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT) -#include <libcfs/user-prim.h> - -#endif /* __KERNEL__ */ - -#endif /* __XNU_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-prim.h b/lnet/include/libcfs/darwin/darwin-prim.h deleted file mode 100644 index 00fbeed0c613b6775e35bb2beb49a2841048ea82..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-prim.h +++ /dev/null @@ -1,527 +0,0 @@ -#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__ -#define __LIBCFS_DARWIN_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <sys/types.h> -#include <sys/systm.h> - -#ifndef __DARWIN8__ -# ifndef __APPLE_API_PRIVATE -# define __APPLE_API_PRIVATE -# include <sys/user.h> -# undef __APPLE_API_PRIVATE -# else -# include <sys/user.h> -# endif -# include <mach/mach_traps.h> -# include <mach/thread_switch.h> -# include <machine/cpu_number.h> -#endif /* !__DARWIN8__ */ - -#include <sys/kernel.h> - -#include <mach/thread_act.h> -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/sched_prim.h> -#include <vm/pmap.h> -#include <vm/vm_kern.h> -#include <mach/machine/vm_param.h> -#include <machine/machine_routines.h> -#include <kern/clock.h> -#include <kern/thread_call.h> -#include <sys/param.h> -#include <sys/vm.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-utils.h> -#include <libcfs/darwin/darwin-lock.h> - -/* - * Symbol functions for libcfs - * - * OSX has no facility for use to register symbol. - * So we have to implement it. - */ -#define CFS_SYMBOL_LEN 64 - -struct cfs_symbol { - char name[CFS_SYMBOL_LEN]; - void *value; - int ref; - struct list_head sym_list; -}; - -extern kern_return_t cfs_symbol_register(const char *, const void *); -extern kern_return_t cfs_symbol_unregister(const char *); -extern void * cfs_symbol_get(const char *); -extern kern_return_t cfs_symbol_put(const char *); - -/* - * sysctl typedef - * - * User can register/unregister a list of sysctl_oids - * sysctl_oid is data struct of osx's sysctl-entry - */ -#define CONFIG_SYSCTL 1 - -typedef struct sysctl_oid * cfs_sysctl_table_t; -typedef cfs_sysctl_table_t cfs_sysctl_table_header_t; -cfs_sysctl_table_header_t *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg); -void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table); - -/* - * Proc file system APIs, no /proc fs support in OSX - */ -typedef struct cfs_proc_dir_entry { - void *data; -} cfs_proc_dir_entry_t; - -cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod, - cfs_proc_dir_entry_t *parent); -void cfs_free_proc_entry(cfs_proc_dir_entry_t *de); -void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry); - -typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, - int count, int *eof, void *data); -typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* - * cfs pseudo device - * - * cfs_psdev_t - * cfs_psdev_register: - * cfs_psdev_deregister: - */ -typedef struct { - int index; - void *handle; - const char *name; - struct cdevsw *devsw; - void *private; -} cfs_psdev_t; - -extern kern_return_t cfs_psdev_register(cfs_psdev_t *); -extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *); - -/* - * Task struct and ... - * - * Using BSD current_proc in Darwin - */ -extern boolean_t assert_wait_possible(void); -extern void *get_bsdtask_info(task_t); - -#ifdef __DARWIN8__ - -typedef struct {} cfs_task_t; -#define cfs_current() ((cfs_task_t *)current_thread()) -#else /* !__DARWIN8__ */ - -typedef struct uthread cfs_task_t; - -#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act())) -#define cfs_current() current_uthread() - -#endif /* !__DARWIN8__ */ - -#define cfs_task_lock(t) do {;} while (0) -#define cfs_task_unlock(t) do {;} while (0) - -#define set_current_state(s) do {;} while (0) - -#define CFS_DECL_JOURNAL_DATA -#define CFS_PUSH_JOURNAL do {;} while(0) -#define CFS_POP_JOURNAL do {;} while(0) - -#define THREAD_NAME(comm, fmt, a...) -/* - * Kernel thread: - * - * OSX kernel thread can not be created with args, - * so we have to implement new APIs to create thread with args - */ - -typedef int (*cfs_thread_t)(void *); - -extern task_t kernel_task; - -/* - * cloning flags, no use in OSX, just copy them from Linux - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - -extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag); - - -/* - * Wait Queue implementation - * - * Like wait_queue in Linux - */ -typedef struct cfs_waitq { - struct ksleep_chan wq_ksleep_chan; -} cfs_waitq_t; - -typedef struct cfs_waitlink { - struct cfs_waitq *wl_waitq; - struct ksleep_link wl_ksleep_link; -} cfs_waitlink_t; - -typedef int cfs_task_state_t; - -#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE -#define CFS_TASK_UNINT THREAD_UNINT - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); - -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, - cfs_duration_t timeout); - -/* - * Thread schedule APIs. - */ -#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12)) -extern void thread_set_timer_deadline(uint64_t deadline); -extern void thread_cancel_timer(void); - -static inline int cfs_schedule_timeout(int state, int64_t timeout) -{ - int result; - -#ifdef __DARWIN8__ - result = assert_wait((event_t)current_thread(), state); -#else - result = assert_wait((event_t)current_uthread(), state); -#endif - if (timeout > 0) { - uint64_t expire; - nanoseconds_to_absolutetime(timeout, &expire); - clock_absolutetime_interval_to_deadline(expire, &expire); - thread_set_timer_deadline(expire); - } - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); - if (timeout > 0) - thread_cancel_timer(); - if (result == THREAD_TIMED_OUT) - result = 0; - else - result = 1; - return result; -} - -#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK) -#define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick) - -#define __wait_event(wq, condition) \ -do { \ - struct cfs_waitlink __wait; \ - \ - cfs_waitlink_init(&__wait); \ - for (;;) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) \ - break; \ - cfs_waitq_wait(&__wait, CFS_TASK_UNINT); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while (0) - -#define wait_event(wq, condition) \ -do { \ - if (condition) \ - break; \ - __wait_event(wq, condition); \ -} while (0) - -#define __wait_event_interruptible(wq, condition, ex, ret) \ -do { \ - struct cfs_waitlink __wait; \ - \ - cfs_waitlink_init(&__wait); \ - for (;;) { \ - if (ex == 0) \ - cfs_waitq_add(&wq, &__wait); \ - else \ - cfs_waitq_add_exclusive(&wq, &__wait); \ - if (condition) \ - break; \ - if (!cfs_signal_pending()) { \ - cfs_waitq_wait(&__wait, \ - CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - continue; \ - } \ - ret = -ERESTARTSYS; \ - break; \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while (0) - -#define wait_event_interruptible(wq, condition) \ -({ \ - int __ret = 0; \ - if (!condition) \ - __wait_event_interruptible(wq, condition, \ - 0, __ret); \ - __ret; \ -}) - -#define wait_event_interruptible_exclusive(wq, condition) \ -({ \ - int __ret = 0; \ - if (!condition) \ - __wait_event_interruptible(wq, condition, \ - 1, __ret); \ - __ret; \ -}) - -#ifndef __DARWIN8__ -extern void wakeup_one __P((void * chan)); -#endif -/* only used in tests */ -#define wake_up_process(p) \ - do { \ - wakeup_one((caddr_t)p); \ - } while (0) - -/* used in couple of places */ -static inline void sleep_on(cfs_waitq_t *waitq) -{ - cfs_waitlink_t link; - - cfs_waitlink_init(&link); - cfs_waitq_add(waitq, &link); - cfs_waitq_wait(&link, CFS_TASK_UNINT); - cfs_waitq_del(waitq, &link); -} - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -#define SIGNAL_MASK_ASSERT() -/* - * Timer - */ -typedef struct cfs_timer { - struct ktimer t; -} cfs_timer_t; - -#define cfs_init_timer(t) do {} while(0) -void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); -void cfs_timer_done(struct cfs_timer *t); -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); -void cfs_timer_disarm(struct cfs_timer *t); -int cfs_timer_is_armed(struct cfs_timer *t); - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t); - -/* - * Ioctl - * We don't need to copy out everything in osx - */ -#define cfs_ioctl_data_out(a, d, l) \ - ({ \ - int __size; \ - int __rc = 0; \ - assert((l) >= sizeof(*d)); \ - __size = (l) - sizeof(*d); \ - if (__size > 0) \ - __rc = copy_to_user((void *)a + __size, \ - (void *)d + __size, \ - __size); \ - __rc; \ - }) - -/* - * CPU - */ -/* Run in PowerG5 who is PPC64 */ -#define SMP_CACHE_BYTES 128 -#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) -#define NR_CPUS 2 - -/* - * XXX Liang: patch xnu and export current_processor()? - * - * #define smp_processor_id() current_processor() - */ -#define smp_processor_id() 0 -/* XXX smp_call_function is not supported in xnu */ -#define smp_call_function(f, a, n, w) do {} while(0) -int cfs_online_cpus(void); -#define smp_num_cpus cfs_online_cpus() - -/* - * Misc - */ -extern int is_suser(void); - -#ifndef likely -#define likely(exp) (exp) -#endif -#ifndef unlikely -#define unlikely(exp) (exp) -#endif - -#define lock_kernel() do {} while(0) -#define unlock_kernel() do {} while(0) - -#define CAP_SYS_BOOT 0 -#define CAP_SYS_ADMIN 1 -#define capable(a) ((a) == CAP_SYS_BOOT ? is_suser(): is_suser1()) - -#define USERMODEHELPER(path, argv, envp) (0) - -#define cfs_module(name, version, init, fini) \ -extern kern_return_t _start(kmod_info_t *ki, void *data); \ -extern kern_return_t _stop(kmod_info_t *ki, void *data); \ -__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \ -__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \ - \ -kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ - { "com.clusterfs.lustre." #name }, { version }, \ - -1, 0, 0, 0, 0, name##_start, name##_stop }; \ - \ -__private_extern__ kmod_start_func_t *_realmain = name##_start; \ -__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \ -__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \ - \ -kern_return_t name##_start(kmod_info_t *ki, void *d) \ -{ \ - return init(); \ -} \ - \ -kern_return_t name##_stop(kmod_info_t *ki, void *d) \ -{ \ - fini(); \ - return KERN_SUCCESS; \ -} \ - \ -/* \ - * to allow semicolon after cfs_module(...) \ - */ \ -struct __dummy_ ## name ## _struct {} - -#define inter_module_get(n) cfs_symbol_get(n) -#define inter_module_put(n) cfs_symbol_put(n) - -static inline int request_module(char *name) -{ - return (-EINVAL); -} - -#ifndef __exit -#define __exit -#endif -#ifndef __init -#define __init -#endif - -#define EXPORT_SYMBOL(s) -#define MODULE_AUTHOR(s) -#define MODULE_DESCRIPTION(s) -#define MODULE_LICENSE(s) -#define MODULE_PARM(a, b) -#define MODULE_PARM_DESC(a, b) - -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0) - -#define NR_IRQS 512 -#define in_interrupt() ml_at_interrupt_context() - -#define KERN_EMERG "<0>" /* system is unusable */ -#define KERN_ALERT "<1>" /* action must be taken immediately */ -#define KERN_CRIT "<2>" /* critical conditions */ -#define KERN_ERR "<3>" /* error conditions */ -#define KERN_WARNING "<4>" /* warning conditions */ -#define KERN_NOTICE "<5>" /* normal but significant condition */ -#define KERN_INFO "<6>" /* informational */ -#define KERN_DEBUG "<7>" /* debug-level messages */ - -static inline long PTR_ERR(const void *ptr) -{ - return (long) ptr; -} - -#define ERR_PTR(err) ((void *)err) -#define IS_ERR(p) ((unsigned long)(p) + 1000 < 1000) - -#else /* !__KERNEL__ */ - -typedef struct cfs_proc_dir_entry { - void *data; -} cfs_proc_dir_entry_t; - -#include <libcfs/user-prim.h> -#define __WORDSIZE 32 - -#endif /* END __KERNEL__ */ -/* - * Error number - */ -#ifndef EPROTO -#define EPROTO EPROTOTYPE -#endif -#ifndef EBADR -#define EBADR EBADRPC -#endif -#ifndef ERESTARTSYS -#define ERESTARTSYS 512 -#endif -#ifndef EDEADLOCK -#define EDEADLOCK EDEADLK -#endif -#ifndef ECOMM -#define ECOMM EINVAL -#endif -#ifndef ENODATA -#define ENODATA EINVAL -#endif -#ifndef ENOTSUPP -#define ENOTSUPP EINVAL -#endif - -#if BYTE_ORDER == BIG_ENDIAN -# define __BIG_ENDIAN -#else -# define __LITTLE_ENDIAN -#endif - -#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-sync.h b/lnet/include/libcfs/darwin/darwin-sync.h deleted file mode 100644 index 5a3fabdca45765fc0a7cfba54f51565804924ce1..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-sync.h +++ /dev/null @@ -1,332 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Implementation of standard libcfs synchronization primitives for XNU - * kernel. - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -/* - * xnu_sync.h - * - * Created by nikita on Sun Jul 18 2004. - * - * Prototypes of XNU synchronization primitives. - */ - -#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__ -#define __LIBCFS_DARWIN_XNU_SYNC_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#define XNU_SYNC_DEBUG (1) - -#if XNU_SYNC_DEBUG -#define ON_SYNC_DEBUG(e) e -#else -#define ON_SYNC_DEBUG(e) -#endif - -enum { - /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */ - KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */ - KSEM_MAGIC = 0x1abe11ed, - KCOND_MAGIC = 0xb01dface, - KRW_MAGIC = 0xdabb1edd, - KSPIN_MAGIC = 0xca11ab1e, - KRW_SPIN_MAGIC = 0xbabeface, - KSLEEP_CHAN_MAGIC = 0x0debac1e, - KSLEEP_LINK_MAGIC = 0xacc01ade, - KTIMER_MAGIC = 0xbefadd1e -}; - -/* ------------------------- spin lock ------------------------- */ - -/* - * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h - */ -#define SMP (1) - -#include <libcfs/list.h> - -#ifdef __DARWIN8__ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <kern/locks.h> - -/* - * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all), - * so use lck_spin_t. we can hack out lck_spin_t easily, it's the only - * hacking in Darwin8.x. We did so because it'll take a lot of time to - * add lock_done for all locks, maybe it should be done in the future. - * If lock_done for all locks were added, we can: - * - * typedef lck_spin_t *xnu_spin_t; - */ -#if defined (__ppc__) -typedef struct { - unsigned int opaque[3]; -} xnu_spin_t; -#elif defined (__i386__) -typedef struct { - unsigned int opaque[10]; -} xnu_spin_t; -#endif - -/* - * wait_queue is not available in Darwin8 (wait_queue_* are not exported), - * use assert_wait/wakeup/wake_one (wait_queue in kernel hash). - */ -typedef void * xnu_wait_queue_t; - -/* DARWIN8 */ -#else - -#include <mach/mach_types.h> -#include <sys/types.h> -#include <kern/simple_lock.h> - -typedef hw_lock_data_t xnu_spin_t; -typedef struct wait_queue xnu_wait_queue_t; - -/* DARWIN8 */ -#endif - -struct kspin { -#if SMP - xnu_spin_t lock; -#endif -#if XNU_SYNC_DEBUG - unsigned magic; - thread_t owner; -#endif -}; - -void kspin_init(struct kspin *spin); -void kspin_done(struct kspin *spin); -void kspin_lock(struct kspin *spin); -void kspin_unlock(struct kspin *spin); -int kspin_trylock(struct kspin *spin); - -#if XNU_SYNC_DEBUG -/* - * two functions below are for use in assertions - */ -/* true, iff spin-lock is locked by the current thread */ -int kspin_islocked(struct kspin *spin); -/* true, iff spin-lock is not locked by the current thread */ -int kspin_isnotlocked(struct kspin *spin); -#else -#define kspin_islocked(s) (1) -#define kspin_isnotlocked(s) (1) -#endif - -/* ------------------------- rw spinlock ----------------------- */ -struct krw_spin { - struct kspin guard; - int count; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void krw_spin_init(struct krw_spin *sem); -void krw_spin_done(struct krw_spin *sem); -void krw_spin_down_r(struct krw_spin *sem); -void krw_spin_down_w(struct krw_spin *sem); -void krw_spin_up_r(struct krw_spin *sem); -void krw_spin_up_w(struct krw_spin *sem); - -/* ------------------------- semaphore ------------------------- */ - -struct ksem { - struct kspin guard; - xnu_wait_queue_t q; - int value; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void ksem_init(struct ksem *sem, int value); -void ksem_done(struct ksem *sem); -int ksem_up (struct ksem *sem, int value); -void ksem_down(struct ksem *sem, int value); -int ksem_trydown(struct ksem *sem, int value); - -/* ------------------------- mutex ------------------------- */ - -struct kmut { - struct ksem s; -#if XNU_SYNC_DEBUG - unsigned magic; - thread_t owner; -#endif -}; - -void kmut_init(struct kmut *mut); -void kmut_done(struct kmut *mut); - -void kmut_lock (struct kmut *mut); -void kmut_unlock (struct kmut *mut); -int kmut_trylock(struct kmut *mut); - -#if XNU_SYNC_DEBUG -/* - * two functions below are for use in assertions - */ -/* true, iff mutex is locked by the current thread */ -int kmut_islocked(struct kmut *mut); -/* true, iff mutex is not locked by the current thread */ -int kmut_isnotlocked(struct kmut *mut); -#else -#define kmut_islocked(m) (1) -#define kmut_isnotlocked(m) (1) -#endif - -/* ------------------------- condition variable ------------------------- */ - -struct kcond_link { - struct kcond_link *next; - struct ksem sem; -}; - -struct kcond { - struct kspin guard; - struct kcond_link *waiters; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void kcond_init(struct kcond *cond); -void kcond_done(struct kcond *cond); -void kcond_wait(struct kcond *cond, struct kspin *lock); -void kcond_signal(struct kcond *cond); -void kcond_broadcast(struct kcond *cond); - -void kcond_wait_guard(struct kcond *cond); -void kcond_signal_guard(struct kcond *cond); -void kcond_broadcast_guard(struct kcond *cond); - -/* ------------------------- read-write semaphore ------------------------- */ - -struct krw_sem { - int count; - struct kcond cond; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void krw_sem_init(struct krw_sem *sem); -void krw_sem_done(struct krw_sem *sem); -void krw_sem_down_r(struct krw_sem *sem); -int krw_sem_down_r_try(struct krw_sem *sem); -void krw_sem_down_w(struct krw_sem *sem); -int krw_sem_down_w_try(struct krw_sem *sem); -void krw_sem_up_r(struct krw_sem *sem); -void krw_sem_up_w(struct krw_sem *sem); - -/* ------------------------- sleep-channel ------------------------- */ - -struct ksleep_chan { - struct kspin guard; - struct list_head waiters; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -#define KSLEEP_CHAN_INITIALIZER {{{0}}} - -struct ksleep_link { - int flags; - event_t event; - int hits; - struct ksleep_chan *forward; - struct list_head linkage; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -enum { - KSLEEP_EXCLUSIVE = 1 -}; - -void ksleep_chan_init(struct ksleep_chan *chan); -void ksleep_chan_done(struct ksleep_chan *chan); - -void ksleep_link_init(struct ksleep_link *link); -void ksleep_link_done(struct ksleep_link *link); - -void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link); -void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link); - -void ksleep_wait(struct ksleep_chan *chan, int state); -int64_t ksleep_timedwait(struct ksleep_chan *chan, int state, uint64_t timeout); - -void ksleep_wake(struct ksleep_chan *chan); -void ksleep_wake_all(struct ksleep_chan *chan); -void ksleep_wake_nr(struct ksleep_chan *chan, int nr); - -#define KSLEEP_LINK_DECLARE(name) \ -{ \ - .flags = 0, \ - .event = 0, \ - .hits = 0, \ - .linkage = CFS_LIST_HEAD(name.linkage), \ - .magic = KSLEEP_LINK_MAGIC \ -} - -/* ------------------------- timer ------------------------- */ - -struct ktimer { - struct kspin guard; - void (*func)(void *); - void *arg; - u_int64_t deadline; /* timer deadline in absolute nanoseconds */ - int armed; -#if XNU_SYNC_DEBUG - unsigned magic; -#endif -}; - -void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg); -void ktimer_done(struct ktimer *t); -void ktimer_arm(struct ktimer *t, u_int64_t deadline); -void ktimer_disarm(struct ktimer *t); -int ktimer_is_armed(struct ktimer *t); - -u_int64_t ktimer_deadline(struct ktimer *t); - -/* __XNU_SYNC_H__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/darwin-tcpip.h b/lnet/include/libcfs/darwin/darwin-tcpip.h deleted file mode 100644 index 1a73891cf94ade08489040dc45a9e4b70bbcd4fd..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-tcpip.h +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_DARWIN_TCPIP_H__ -#define __LIBCFS_DARWIN_TCPIP_H__ - -#ifdef __KERNEL__ -#include <sys/socket.h> - -#ifdef __DARWIN8__ - -struct socket; - -typedef void (*so_upcall)(socket_t sock, void* arg, int waitf); - -#define CFS_SOCK_UPCALL 0x1 -#define CFS_SOCK_DOWN 0x2 - -#define CFS_SOCK_MAGIC 0xbabeface - -typedef struct cfs_socket { - socket_t s_so; - int s_magic; - int s_flags; - so_upcall s_upcall; - void *s_upcallarg; -} cfs_socket_t; - - -/* cfs_socket_t to bsd socket */ -#define C2B_SOCK(s) ((s)->s_so) - -static inline int get_sock_intopt(socket_t so, int opt) -{ - int val, len; - int rc; - - /* - * sock_getsockopt will take a lock(mutex) for socket, - * so it can be blocked. So be careful while using - * them. - */ - len = sizeof(val); - rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len); - assert(rc == 0); - return val; -} - -#define SOCK_ERROR(s) get_sock_intopt(C2B_SOCK(s), SO_ERROR) -/* #define SOCK_WMEM_QUEUED(s) (0) */ -#define SOCK_WMEM_QUEUED(s) get_sock_intopt(C2B_SOCK(s), SO_NWRITE) -/* XXX Liang: no reliable way to get it in Darwin8.x */ -#define SOCK_TEST_NOSPACE(s) (0) - -void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg); -void libcfs_sock_reset_cb(cfs_socket_t *sock); - -#else /* !__DARWIN8__ */ - -#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc) -#define SOCK_ERROR(so) ((so)->so_error) - -#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat) - -#endif /* !__DARWIN8__ */ - -#endif /* __KERNEL END */ - -#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-time.h b/lnet/include/libcfs/darwin/darwin-time.h deleted file mode 100644 index 43ad274630814b9770f4083f409f14daedec90e3..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-time.h +++ /dev/null @@ -1,248 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for XNU kernel - * - */ - -#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__ -#define __LIBCFS_DARWIN_DARWIN_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -#ifdef __KERNEL__ -#include <sys/types.h> -#include <sys/systm.h> - -#include <sys/kernel.h> - -#include <mach/mach_types.h> -#include <mach/time_value.h> -#include <kern/clock.h> -#include <sys/param.h> - -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-utils.h> -#include <libcfs/darwin/darwin-lock.h> - -/* - * There are three way to measure time in OS X: - * 1. nanoseconds - * 2. absolute time (abstime unit equal to the length of one bus cycle), - * schedule of thread/timer are counted by absolute time, but abstime - * in different mac can be different also, so we wouldn't use it. - * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI - * like tsleep(). - * - * We use nanoseconds (uptime, not calendar time) - * - * clock_get_uptime() :get absolute time since bootup. - * nanouptime() :get nanoseconds since bootup - * microuptime() :get microseonds since bootup - * nanotime() :get nanoseconds since epoch - * microtime() :get microseconds since epoch - */ -typedef u_int64_t cfs_time_t; /* nanoseconds */ -typedef int64_t cfs_duration_t; - -#define CFS_TIME_T "%llu" -#define CFS_DURATION_T "%lld" - -typedef struct timeval cfs_fs_time_t; - -static inline cfs_time_t cfs_time_current(void) -{ - struct timespec instant; - - nanouptime(&instant); - return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec; -} - -static inline time_t cfs_time_current_sec(void) -{ - struct timespec instant; - - nanouptime(&instant); - return instant.tv_sec; -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return (int64_t)t1 - (int64_t)t2 < 0; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return (int64_t)t1 - (int64_t)t2 <= 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - microtime((struct timeval *)t); -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * NSEC_PER_USEC; -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return (NSEC_PER_SEC * (int64_t)seconds); -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0; -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0; -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / NSEC_PER_SEC; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = d / NSEC_PER_SEC; - s->tv_usec = (d - ((int64_t)s->tv_sec) * NSEC_PER_SEC) / NSEC_PER_USEC; -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = d / NSEC_PER_SEC; - s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before - -/* - * One jiffy (in nanoseconds) - * - * osfmk/kern/sched_prim.c - * #define DEFAULT_PREEMPTION_RATE 100 - */ -#define CFS_TICK (NSEC_PER_SEC / (u_int64_t)100) - -#define LTIME_S(t) (t) - -/* __KERNEL__ */ -#else - -/* - * User level - */ -#include <libcfs/user-time.h> - -/* __KERNEL__ */ -#endif - -/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/darwin/darwin-types.h b/lnet/include/libcfs/darwin/darwin-types.h deleted file mode 100644 index 0fd2966c792bb970baee44977e7f1ef973ef4a80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-types.h +++ /dev/null @@ -1,92 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__ -#define __LIBCFS_DARWIN_XNU_TYPES_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <mach/mach_types.h> -#include <sys/types.h> - -#ifndef _BLKID_TYPES_H -#define _BLKID_TYPES_H -#endif - -typedef u_int8_t __u8; -typedef u_int16_t __u16; -typedef u_int32_t __u32; -typedef u_int64_t __u64; -typedef int8_t __s8; -typedef int16_t __s16; -typedef int32_t __s32; -typedef int64_t __s64; - -#ifdef __KERNEL__ - -#include <kern/kern_types.h> - - -typedef struct { int e; } event_chan_t; -typedef dev_t kdev_t; - -/* - * Atmoic define - */ -#include <libkern/OSAtomic.h> - -typedef struct { volatile uint32_t counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } -#define atomic_read(a) ((a)->counter) -#define atomic_set(a, v) (((a)->counter) = (v)) -#ifdef __DARWIN8__ -#define atomic_add(v, a) OSAddAtomic(v, (SInt32 *)&((a)->counter)) -#define atomic_sub(v, a) OSAddAtomic(-(v), (SInt32 *)&((a)->counter)) -#define atomic_inc(a) OSIncrementAtomic((SInt32 *)&((a)->counter)) -#define atomic_dec(a) OSDecrementAtomic((SInt32 *)&((a)->counter)) -#else /* !__DARWIN8__ */ -#define atomic_add(v, a) hw_atomic_add((uint32_t *)&((a)->counter), v) -#define atomic_sub(v, a) hw_atomic_sub((uint32_t *)&((a)->counter), v) -#define atomic_inc(a) atomic_add(1, a) -#define atomic_dec(a) atomic_sub(1, a) -#endif /* !__DARWIN8__ */ -#define atomic_sub_and_test(v, a) ( atomic_sub(v, a) == -(a) ) -#define atomic_dec_and_test(a) ( atomic_dec(a) == 1 ) - -#include <libsa/mach/mach.h> -typedef off_t loff_t; - -#else /* !__KERNEL__ */ - -#include <stdint.h> - -typedef off_t loff_t; - -#endif /* __KERNEL END */ -typedef unsigned short umode_t; - -#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-utils.h b/lnet/include/libcfs/darwin/darwin-utils.h deleted file mode 100644 index 0f808a26bfa76c3694379214c84c2a0057c6c2d0..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/darwin-utils.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef __LIBCFS_DARWIN_UTILS_H__ -#define __LIBCFS_DARWIN_UTILS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <sys/random.h> - -#ifdef __KERNEL__ -inline int isspace(char c); -char *strpbrk(const char *cs, const char *ct); -char * strsep(char **s, const char *ct); -size_t strnlen(const char * s, size_t count); -char * strstr(const char *in, const char *str); -char * strrchr(const char *p, int ch); -char * ul2dstr(unsigned long address, char *buf, int len); - -#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) -#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3) -#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3) -#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3) - -#define test_bit(i, a) isset(a, i) -#define set_bit(i, a) setbit(a, i) -#define clear_bit(i, a) clrbit(a, i) - -#define get_random_bytes(buf, len) read_random(buf, len) - -#endif /* __KERNEL__ */ - -#ifndef min_t -#define min_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) -#endif -#ifndef max_t -#define max_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) -#endif - -#define do_div(n,base) \ - ({ \ - __u64 __n = (n); \ - __u32 __base = (base); \ - __u32 __mod; \ - \ - __mod = __n % __base; \ - n = __n / __base; \ - __mod; \ - }) - -#define NIPQUAD(addr) \ - ((unsigned char *)&addr)[0], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[3] - -#define HIPQUAD NIPQUAD - -#ifndef LIST_CIRCLE -#define LIST_CIRCLE(elm, field) \ - do { \ - (elm)->field.le_prev = &(elm)->field.le_next; \ - } while (0) -#endif - -#endif /* __XNU_UTILS_H__ */ diff --git a/lnet/include/libcfs/darwin/kp30.h b/lnet/include/libcfs/darwin/kp30.h deleted file mode 100644 index f9e94b18cc431385d0ceac1de4fd066ea95e8774..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/kp30.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_KP30__ -#define __LIBCFS_DARWIN_KP30__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#ifdef __KERNEL__ - -#include <sys/types.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <miscfs/devfs/devfs.h> -#include <stdarg.h> - -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-prim.h> -#include <lnet/lnet.h> - -#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1) - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif -#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */ - -#define LIBCFS_PANIC(msg) panic(msg) -#error libcfs_register_panic_notifier() missing -#error libcfs_unregister_panic_notifier() missing - -/* --------------------------------------------------------------------- */ - -#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x)) -#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) - -#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0) -#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0) - -#define num_online_cpus() cfs_online_cpus() - -/******************************************************************************/ -/* XXX Liang: There is no module parameter supporting in OSX */ -#define CFS_MODULE_PARM(name, t, type, perm, desc) - -#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ -/******************************************************************************/ - -#else /* !__KERNEL__ */ -# include <stdio.h> -# include <stdlib.h> -# include <stdint.h> -# include <unistd.h> -# include <time.h> -# include <machine/limits.h> -# include <sys/types.h> -#endif - -#define BITS_PER_LONG LONG_BIT -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -typedef struct { - long long lwte_when; - char *lwte_where; - void *lwte_task; - long lwte_p1; - long lwte_p2; - long lwte_p3; - long lwte_p4; -} lwt_event_t; - -# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */ - -/* -------------------------------------------------------------------------- */ - -#define IOCTL_LIBCFS_TYPE struct libcfs_ioctl_data - -#define LPU64 "%llu" -#define LPD64 "%lld" -#define LPX64 "%#llx" -#define LPSZ "%lu" -#define LPSSZ "%ld" -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a) - -#endif diff --git a/lnet/include/libcfs/darwin/libcfs.h b/lnet/include/libcfs/darwin/libcfs.h deleted file mode 100644 index eb4d8f35982bb076c246eca81fb6ad0c5cd5de8f..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/libcfs.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_LIBCFS_H__ -#define __LIBCFS_DARWIN_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <mach/mach_types.h> -#include <sys/errno.h> -#include <string.h> -#include <libcfs/darwin/darwin-types.h> -#include <libcfs/darwin/darwin-time.h> -#include <libcfs/darwin/darwin-prim.h> -#include <libcfs/darwin/darwin-mem.h> -#include <libcfs/darwin/darwin-lock.h> -#include <libcfs/darwin/darwin-fs.h> -#include <libcfs/darwin/darwin-tcpip.h> - -#ifdef __KERNEL__ -# include <sys/types.h> -# include <sys/time.h> -# define do_gettimeofday(tv) microuptime(tv) -#else -# include <sys/time.h> -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif - -#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x) -#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x) -#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x) - -#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x) -#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x) -#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x) - -#define cpu_to_le64(x) __cpu_to_le64(x) -#define cpu_to_le32(x) __cpu_to_le32(x) -#define cpu_to_le16(x) __cpu_to_le16(x) - -#define le64_to_cpu(x) __le64_to_cpu(x) -#define le32_to_cpu(x) __le32_to_cpu(x) -#define le16_to_cpu(x) __le16_to_cpu(x) - -#define __swab16(x) OSSwapInt16(x) -#define __swab32(x) OSSwapInt32(x) -#define __swab64(x) OSSwapInt64(x) -#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0) -#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0) -#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0) - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - - -#ifdef __KERNEL__ -# include <sys/systm.h> -# include <pexpert/pexpert.h> -/* Fix me */ -# define THREAD_SIZE 8192 -#else -# define THREAD_SIZE 8192 -#endif -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) - -/* Darwin has defined RETURN, so we have to undef it in lustre */ -#ifdef RETURN -#undef RETURN -#endif - -/* - * When this is enabled debugging messages are indented according to the - * current "nesting level". Nesting level in increased when ENTRY macro - * is executed, and decreased on EXIT and RETURN. - */ -#ifdef __KERNEL__ -#define ENTRY_NESTING_SUPPORT (0) -#endif - -#if ENTRY_NESTING_SUPPORT - -/* - * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic - * idea is to keep per-thread pointer to small data structure (struct - * cfs_debug_data) describing current nesting level. In XNU unused - * proc->p_wmegs field in hijacked for this. On Linux - * current->journal_info can be used. In user space - * pthread_{g,s}etspecific(). - * - * ENTRY macro allocates new cfs_debug_data on stack, and installs it as - * a current nesting level, storing old data in cfs_debug_data it just - * created. - * - * EXIT pops old value back. - * - */ - -/* - * One problem with this approach is that there is a lot of code that - * does ENTRY and then escapes scope without doing EXIT/RETURN. In this - * case per-thread current nesting level pointer is dangling (it points - * to the stack area that is possible already overridden). To detect - * such cases, we add two magic fields to the cfs_debug_data and check - * them whenever current nesting level pointer is dereferenced. While - * looking flaky this works because stack is always consumed - * "continously". - */ -enum { - CDD_MAGIC1 = 0x02128506, - CDD_MAGIC2 = 0x42424242 -}; - -struct cfs_debug_data { - unsigned int magic1; - struct cfs_debug_data *parent; - int nesting_level; - unsigned int magic2; -}; - -void __entry_nesting(struct cfs_debug_data *child); -void __exit_nesting(struct cfs_debug_data *child); -unsigned int __current_nesting_level(void); - -#define ENTRY_NESTING \ -struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \ - .parent = NULL, \ - .nesting_level = 0, \ - .magic2 = CDD_MAGIC2 }; \ -__entry_nesting(&__cdd); - -#define EXIT_NESTING __exit_nesting(&__cdd) - -/* ENTRY_NESTING_SUPPORT */ -#else - -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -/* ENTRY_NESTING_SUPPORT */ -#endif - -#define LUSTRE_LNET_PID 12345 - -#define _XNU_LIBCFS_H - -/* - * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) - * - * Implementation is in darwin-curproc.c - */ -#define CFS_CURPROC_COMM_MAX MAXCOMLEN -/* - * XNU has no capabilities - */ -typedef int cfs_kernel_cap_t; - -#ifdef __KERNEL__ -enum { - /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -#define printk(format, args...) printf(format, ## args) - -#ifdef WITH_WATCHDOG -#undef WITH_WATCHDOG -#endif - -#endif /* __KERNEL__ */ - -#endif /* _XNU_LIBCFS_H */ diff --git a/lnet/include/libcfs/darwin/lltrace.h b/lnet/include/libcfs/darwin/lltrace.h deleted file mode 100644 index 31d6e17f2812a3a4435b0688d451cffca5e65c01..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/lltrace.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_DARWIN_LLTRACE_H__ -#define __LIBCFS_DARWIN_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/time.h> -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <mach/vm_param.h> -#include <lnet/lnetctl.h> - -#endif diff --git a/lnet/include/libcfs/darwin/portals_utils.h b/lnet/include/libcfs/darwin/portals_utils.h deleted file mode 100644 index 4907cb15e2576159cb975c39edbc031bf625728d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/darwin/portals_utils.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__ -#define __LIBCFS_DARWIN_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#include <libcfs/list.h> -#ifdef __KERNEL__ -#include <mach/mach_types.h> -#include <libcfs/libcfs.h> -#else /* !__KERNEL__ */ -#include <machine/endian.h> -#include <netinet/in.h> -#include <sys/syscall.h> -#endif /* !__KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/kp30.h b/lnet/include/libcfs/kp30.h deleted file mode 100644 index 7f4426a26340112541103a57d2250a079c1e2b87..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/kp30.h +++ /dev/null @@ -1,606 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_KP30_H__ -#define __LIBCFS_KP30_H__ - -/* Controlled via configure key */ -/* #define LIBCFS_DEBUG */ - -#include <libcfs/libcfs.h> -#include <lnet/types.h> - -#if defined(__linux__) -#include <libcfs/linux/kp30.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/kp30.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/kp30.h> -#else -#error Unsupported operating system -#endif - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#ifdef __KERNEL__ - -#ifdef LIBCFS_DEBUG - -/* - * When this is on, LASSERT macro includes check for assignment used instead - * of equality check, but doesn't have unlikely(). Turn this on from time to - * time to make test-builds. This shouldn't be on for production release. - */ -#define LASSERT_CHECKED (0) - -#if LASSERT_CHECKED -/* - * Assertion. - * - * Strange construction with empty "then" clause is used to trigger compiler - * warnings on the assertions of the form LASSERT(a = b); - * - * "warning: suggest parentheses around assignment used as truth value" - * - * requires -Wall. Unfortunately this rules out use of likely/unlikely. - */ -#define LASSERT(cond) \ -({ \ - if (cond) \ - ; \ - else \ - libcfs_assertion_failed( #cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) - -#define LASSERTF(cond, fmt, a...) \ -({ \ - if (cond) \ - ; \ - else { \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ - __FILE__, __FUNCTION__,__LINE__, \ - "ASSERTION(" #cond ") failed:" fmt, \ - ## a); \ - LBUG(); \ - } \ -}) - -/* LASSERT_CHECKED */ -#else - -#define LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) \ - libcfs_assertion_failed(#cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) - -#define LASSERTF(cond, fmt, a...) \ -({ \ - if (unlikely(!(cond))) { \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ - __FILE__, __FUNCTION__,__LINE__, \ - "ASSERTION(" #cond ") failed:" fmt, \ - ## a); \ - LBUG(); \ - } \ -}) - -/* LASSERT_CHECKED */ -#endif - -/* LIBCFS_DEBUG */ -#else -#define LASSERT(e) ((void)(0)) -#define LASSERTF(cond, fmt...) ((void)(0)) -#endif /* LIBCFS_DEBUG */ - -void lbug_with_loc(char *file, const char *func, const int line) - __attribute__((noreturn)); - -#define LBUG() lbug_with_loc(__FILE__, __FUNCTION__, __LINE__) - -extern atomic_t libcfs_kmemory; -/* - * Memory - */ -#ifdef LIBCFS_DEBUG - -# define libcfs_kmem_inc(ptr, size) \ -do { \ - atomic_add(size, &libcfs_kmemory); \ -} while (0) - -# define libcfs_kmem_dec(ptr, size) do { \ - atomic_sub(size, &libcfs_kmemory); \ -} while (0) - -#else -# define libcfs_kmem_inc(ptr, size) do {} while (0) -# define libcfs_kmem_dec(ptr, size) do {} while (0) -#endif /* LIBCFS_DEBUG */ - -#define LIBCFS_VMALLOC_SIZE 16384 - -#define LIBCFS_ALLOC_GFP(ptr, size, mask) \ -do { \ - LASSERT(!in_interrupt() || \ - (size <= LIBCFS_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\ - if (unlikely((size) > LIBCFS_VMALLOC_SIZE)) \ - (ptr) = cfs_alloc_large(size); \ - else \ - (ptr) = cfs_alloc((size), (mask)); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ - #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - CERROR("LNET: %d total bytes allocated by lnet\n", \ - atomic_read(&libcfs_kmemory)); \ - } else { \ - libcfs_kmem_inc((ptr), (size)); \ - if (!((mask) & CFS_ALLOC_ZERO)) \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read (&libcfs_kmemory)); \ -} while (0) - -#define LIBCFS_ALLOC(ptr, size) \ - LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_IO) - -#define LIBCFS_ALLOC_ATOMIC(ptr, size) \ - LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC) - -#define LIBCFS_FREE(ptr, size) \ -do { \ - int s = (size); \ - if (unlikely((ptr) == NULL)) { \ - CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ - cfs_free_large(ptr); \ - else \ - cfs_free(ptr); \ - libcfs_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read(&libcfs_kmemory)); \ -} while (0) - -/******************************************************************************/ - -/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */ -#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) -#define ___htonl(x) __cpu_to_be32(x) -#define ___htons(x) __cpu_to_be16(x) -#define ___ntohl(x) __be32_to_cpu(x) -#define ___ntohs(x) __be16_to_cpu(x) -#define htonl(x) ___htonl(x) -#define ntohl(x) ___ntohl(x) -#define htons(x) ___htons(x) -#define ntohs(x) ___ntohs(x) -#endif - -void libcfs_debug_dumpstack(cfs_task_t *tsk); -void libcfs_run_upcall(char **argv); -void libcfs_run_lbug_upcall(char * file, const char *fn, const int line); -void libcfs_debug_dumplog(void); -int libcfs_debug_init(unsigned long bufsize); -int libcfs_debug_cleanup(void); -int libcfs_debug_clear_buffer(void); -int libcfs_debug_mark_buffer(char *text); - -void libcfs_debug_set_level(unsigned int debug_level); - -#else /* !__KERNEL__ */ -# ifdef LIBCFS_DEBUG -# undef NDEBUG -# include <assert.h> -# define LASSERT(e) assert(e) -# define LASSERTF(cond, args...) \ -do { \ - if (!(cond)) \ - CERROR(args); \ - assert(cond); \ -} while (0) -# define LBUG() assert(0) -# else -# define LASSERT(e) ((void)(0)) -# define LASSERTF(cond, args...) do { } while (0) -# define LBUG() ((void)(0)) -# endif /* LIBCFS_DEBUG */ -# define printk(format, args...) printf (format, ## args) -# ifdef CRAY_XT3 /* buggy calloc! */ -# define LIBCFS_ALLOC(ptr, size) \ - do { \ - (ptr) = malloc(size); \ - memset(ptr, 0, size); \ - } while (0); -# else -# define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0); -# endif -# define LIBCFS_FREE(a, b) do { free(a); } while (0); - -void libcfs_debug_dumplog(void); -int libcfs_debug_init(unsigned long bufsize); -int libcfs_debug_cleanup(void); - -/* - * Generic compiler-dependent macros required for kernel - * build go below this comment. Actual compiler/compiler version - * specific implementations come from the above header files - */ - -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) - -/* !__KERNEL__ */ -#endif - -/* - * compile-time assertions. @cond has to be constant expression. - * ISO C Standard: - * - * 6.8.4.2 The switch statement - * - * .... - * - * [#3] The expression of each case label shall be an integer - * constant expression and no two of the case constant - * expressions in the same switch statement shall have the same - * value after conversion... - * - */ -#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) - -/* support decl needed both by kernel and liblustre */ -int libcfs_isknown_lnd(int type); -char *libcfs_lnd2modname(int type); -char *libcfs_lnd2str(int type); -int libcfs_str2lnd(char *str); -char *libcfs_net2str(__u32 net); -char *libcfs_nid2str(lnet_nid_t nid); -__u32 libcfs_str2net(char *str); -lnet_nid_t libcfs_str2nid(char *str); -int libcfs_str2anynid(lnet_nid_t *nid, char *str); -char *libcfs_id2str(lnet_process_id_t id); -void libcfs_setnet0alias(int type); - -/* how an LNET NID encodes net:address */ -#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff)) -#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff) -#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr))) -/* how net encodes type:number */ -#define LNET_NETNUM(net) ((net) & 0xffff) -#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) -#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num))) - -/* implication */ -#define ergo(a, b) (!(a) || (b)) -/* logical equivalence */ -#define equi(a, b) (!!(a) == !!(b)) - -#ifndef CURRENT_TIME -# define CURRENT_TIME time(0) -#endif - -/* -------------------------------------------------------------------- - * Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. - * All stuff about lwt are put in arch/kp30.h - * -------------------------------------------------------------------- */ - -struct libcfs_device_userstate -{ - int ldu_memhog_pages; - cfs_page_t *ldu_memhog_root_page; -}; - -/* what used to be in portals_lib.h */ -#ifndef MIN -# define MIN(a,b) (((a)<(b)) ? (a): (b)) -#endif -#ifndef MAX -# define MAX(a,b) (((a)>(b)) ? (a): (b)) -#endif - -#define MKSTR(ptr) ((ptr))? (ptr) : "" - -static inline int size_round4 (int val) -{ - return (val + 3) & (~0x3); -} - -static inline int size_round (int val) -{ - return (val + 7) & (~0x7); -} - -static inline int size_round16(int val) -{ - return (val + 0xf) & (~0xf); -} - -static inline int size_round32(int val) -{ - return (val + 0x1f) & (~0x1f); -} - -static inline int size_round0(int val) -{ - if (!val) - return 0; - return (val + 1 + 7) & (~0x7); -} - -static inline size_t round_strlen(char *fset) -{ - return (size_t)size_round((int)strlen(fset) + 1); -} - -#define LOGL(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)ptr, (const char *)var, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGU(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)var, (const char *)ptr, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGL0(var,len,ptr) \ -do { \ - if (!len) \ - break; \ - memcpy((char *)ptr, (const char *)var, len); \ - *((char *)(ptr) + len) = 0; \ - ptr += size_round(len + 1); \ -} while (0) - -/* - * USER LEVEL STUFF BELOW - */ - -#define LIBCFS_IOCTL_VERSION 0x0001000a - -struct libcfs_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - - __u64 ioc_nid; - __u64 ioc_u64[1]; - - __u32 ioc_flags; - __u32 ioc_count; - __u32 ioc_net; - __u32 ioc_u32[7]; - - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - - __u32 ioc_plen1; /* buffers in userspace */ - char *ioc_pbuf1; - __u32 ioc_plen2; /* buffers in userspace */ - char *ioc_pbuf2; - - char ioc_bulk[0]; -}; - - -struct libcfs_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -struct libcfs_debug_ioctl_data -{ - struct libcfs_ioctl_hdr hdr; - unsigned int subs; - unsigned int debug; -}; - -#define LIBCFS_IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_version = LIBCFS_IOCTL_VERSION; \ - data.ioc_len = sizeof(data); \ -} while (0) - -/* FIXME check conflict with lustre_lib.h */ -#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) - -static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) -{ - int len = sizeof(*data); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - return len; -} - -static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return 1; - } - if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) { - CERROR ("LIBCFS ioctl: packlen != ioc_len\n"); - return 1; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int libcfs_ioctl_pack(struct libcfs_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct libcfs_ioctl_data *overlay; - data->ioc_len = libcfs_ioctl_packlen(data); - data->ioc_version = LIBCFS_IOCTL_VERSION; - - if (*pbuf && libcfs_ioctl_packlen(data) > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct libcfs_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (libcfs_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} - -#else - -extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg); -extern int libcfs_ioctl_popdata(void *arg, void *buf, int size); - -#endif - -/* ioctls for manipulating snapshots 30- */ -#define IOC_LIBCFS_TYPE 'e' -#define IOC_LIBCFS_MIN_NR 30 -/* libcfs ioctls */ -#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE) -/* lnet ioctls */ -#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) -/* lnd ioctls */ -#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) -#define IOC_LIBCFS_GET_GMID _IOWR('e', 81, IOCTL_LIBCFS_TYPE) - -#define IOC_LIBCFS_MAX_NR 81 - - -enum { - /* Only add to these values (i.e. don't ever change or redefine them): - * network addresses depend on them... */ - QSWLND = 1, - SOCKLND = 2, - GMLND = 3, - PTLLND = 4, - O2IBLND = 5, - CIBLND = 6, - OPENIBLND = 7, - IIBLND = 8, - LOLND = 9, - RALND = 10, - VIBLND = 11, - MXLND = 12, -}; - -enum { - DEBUG_DAEMON_START = 1, - DEBUG_DAEMON_STOP = 2, - DEBUG_DAEMON_PAUSE = 3, - DEBUG_DAEMON_CONTINUE = 4, -}; - - -enum cfg_record_type { - PORTALS_CFG_TYPE = 1, - LUSTRE_CFG_TYPE = 123, -}; - -typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); - -/* lustre_id output helper macros */ -#define DLID4 "%lu/%lu/%lu/%lu" - -#define OLID4(id) \ - (unsigned long)(id)->li_fid.lf_id, \ - (unsigned long)(id)->li_fid.lf_group, \ - (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \ - (unsigned long)(id)->li_stc.u.e3s.l3s_gen - -#endif diff --git a/lnet/include/libcfs/libcfs.h b/lnet/include/libcfs/libcfs.h deleted file mode 100644 index 341d3caa534ede90d6d68a267de641a3583c9d38..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/libcfs.h +++ /dev/null @@ -1,669 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LIBCFS_H__ -#define __LIBCFS_LIBCFS_H__ - -#if !__GNUC__ -#define __attribute__(x) -#endif - -#if defined(__linux__) -#include <libcfs/linux/libcfs.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/libcfs.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/libcfs.h> -#else -#error Unsupported operating system. -#endif - -#include "curproc.h" - -#ifndef __KERNEL__ -#include <stdio.h> -#endif - -/* Controlled via configure key */ -/* #define LIBCFS_DEBUG */ - -#ifndef offsetof -# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) -#endif - -/* cardinality of array */ -#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0]))) - -#if !defined(container_of) -/* given a pointer @ptr to the field @member embedded into type (usually - * struct) @type, return pointer to the embedding instance of @type. */ -#define container_of(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) -#endif - -#define container_of0(ptr, type, member) \ -({ \ - typeof(ptr) __ptr = (ptr); \ - __ptr ? container_of(__ptr, type, member) : NULL; \ -}) - -/* - * true iff @i is power-of-2 - */ -#define IS_PO2(i) \ -({ \ - typeof(i) __i; \ - \ - __i = (i); \ - !(__i & (__i - 1)); \ -}) - -#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) - -/* - * Debugging - */ -extern unsigned int libcfs_subsystem_debug; -extern unsigned int libcfs_stack; -extern unsigned int libcfs_debug; -extern unsigned int libcfs_printk; -extern unsigned int libcfs_console_ratelimit; -extern unsigned int libcfs_debug_binary; -extern char debug_file_path[1024]; - -int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys); -int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys); - -/* Has there been an LBUG? */ -extern unsigned int libcfs_catastrophe; - -/* - * struct ptldebug_header is defined in libcfs/<os>/libcfs.h - */ - -#define PH_FLAG_FIRST_RECORD 1 - -/* Debugging subsystems (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ -#define S_UNDEFINED 0x00000001 -#define S_MDC 0x00000002 -#define S_MDS 0x00000004 -#define S_OSC 0x00000008 -#define S_OST 0x00000010 -#define S_CLASS 0x00000020 -#define S_LOG 0x00000040 -#define S_LLITE 0x00000080 -#define S_RPC 0x00000100 -#define S_MGMT 0x00000200 -#define S_LNET 0x00000400 -#define S_LND 0x00000800 /* ALL LNDs */ -#define S_PINGER 0x00001000 -#define S_FILTER 0x00002000 -/* unused */ -#define S_ECHO 0x00008000 -#define S_LDLM 0x00010000 -#define S_LOV 0x00020000 -/* unused */ -/* unused */ -/* unused */ -/* unused */ -/* unused */ -#define S_LMV 0x00800000 /* b_new_cmd */ -/* unused */ -#define S_SEC 0x02000000 /* upcall cache */ -#define S_GSS 0x04000000 /* b_new_cmd */ -/* unused */ -#define S_MGC 0x10000000 -#define S_MGS 0x20000000 -#define S_FID 0x40000000 /* b_new_cmd */ -#define S_FLD 0x80000000 /* b_new_cmd */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ - -/* Debugging masks (32 bits, non-overlapping) */ -/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ -#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ -#define D_INODE 0x00000002 -#define D_SUPER 0x00000004 -#define D_EXT2 0x00000008 /* anything from ext2_debug */ -#define D_MALLOC 0x00000010 /* print malloc, free information */ -#define D_CACHE 0x00000020 /* cache-related items */ -#define D_INFO 0x00000040 /* general information */ -#define D_IOCTL 0x00000080 /* ioctl related information */ -#define D_NETERROR 0x00000100 /* network errors */ -#define D_NET 0x00000200 /* network communications */ -#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ -#define D_BUFFS 0x00000800 -#define D_OTHER 0x00001000 -#define D_DENTRY 0x00002000 -#define D_NETTRACE 0x00004000 -#define D_PAGE 0x00008000 /* bulk page handling */ -#define D_DLMTRACE 0x00010000 -#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA 0x00080000 /* recovery and failover */ -#define D_RPCTRACE 0x00100000 /* for distributed debugging */ -#define D_VFSTRACE 0x00200000 -#define D_READA 0x00400000 /* read-ahead */ -#define D_MMAP 0x00800000 -#define D_CONFIG 0x01000000 -#define D_CONSOLE 0x02000000 -#define D_QUOTA 0x04000000 -#define D_SEC 0x08000000 -/* keep these in sync with lnet/{utils,libcfs}/debug.c */ - -#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) - -#ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -#endif - -#define CDEBUG_MAX_LIMIT 600 -typedef struct { - cfs_time_t cdls_next; - int cdls_count; - cfs_duration_t cdls_delay; -} cfs_debug_limit_state_t; - -/* Controlled via configure key */ -/* #define CDEBUG_ENABLED (1) */ - -#ifdef __KERNEL__ - -#ifdef CDEBUG_ENABLED -#define __CDEBUG(cdls, mask, format, a...) \ -do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - format, ## a); \ -} while (0) - -#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a) - -#define CDEBUG_LIMIT(mask, format, a...) \ -do { \ - static cfs_debug_limit_state_t cdls; \ - \ - __CDEBUG(&cdls, mask, format, ## a); \ -} while (0) - -#else /* CDEBUG_ENABLED */ -#define CDEBUG(mask, format, a...) (void)(0) -#define CDEBUG_LIMIT(mask, format, a...) (void)(0) -#warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!" -#endif - -#elif defined(__arch_lib__) && !defined(LUSTRE_UTILS) - -#define CDEBUG(mask, format, a...) \ -do { \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - format, ## a); \ -} while (0) - -#define CDEBUG_LIMIT CDEBUG - -#else - -#define CDEBUG(mask, format, a...) \ -do { \ - if (((mask) & D_CANTMASK) != 0) \ - fprintf(stderr, "(%s:%d:%s()) " format, \ - __FILE__, __LINE__, __FUNCTION__, ## a); \ -} while (0) - -#define CDEBUG_LIMIT CDEBUG - -#endif /* !__KERNEL__ */ - -#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG_LIMIT(D_EMERG, format, ## a) - -#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a) -#define LCONSOLE_INFO(format, a...) CDEBUG_LIMIT(D_CONSOLE, format, ## a) -#define LCONSOLE_WARN(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a) -#define LCONSOLE_ERROR(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, format, ## a) -#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a) - -#ifdef CDEBUG_ENABLED - -#define GOTO(label, rc) \ -do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ - (signed long)GOTO__ret); \ - goto label; \ -} while (0) -#else -#define GOTO(label, rc) do { ((void)(rc)); goto label; } while (0) -#endif - -/* Controlled via configure key */ -/* #define CDEBUG_ENTRY_EXIT (1) */ - -#ifdef CDEBUG_ENTRY_EXIT - -/* - * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise - * there will be a warning in osx. - */ -#define RETURN(rc) \ -do { \ - typeof(rc) RETURN__ret = (rc); \ - CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ - (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ - EXIT_NESTING; \ - return RETURN__ret; \ -} while (0) - -#define ENTRY \ -ENTRY_NESTING; \ -do { \ - CDEBUG(D_TRACE, "Process entered\n"); \ -} while (0) - -#define EXIT \ -do { \ - CDEBUG(D_TRACE, "Process leaving\n"); \ - EXIT_NESTING; \ -} while(0) -#else /* !CDEBUG_ENTRY_EXIT */ - -#define RETURN(rc) return (rc) -#define ENTRY do { } while (0) -#define EXIT do { } while (0) - -#endif /* !CDEBUG_ENTRY_EXIT */ - -/* - * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses - * Lustre RETURN(NULL) macro. - */ -#if defined(NULL) -#undef NULL -#endif - -#define NULL ((void *)0) - -#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID - -#ifdef __KERNEL__ - -#include <libcfs/list.h> - -struct libcfs_ioctl_data; /* forward ref */ - -struct libcfs_ioctl_handler { - struct list_head item; - int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data); -}; - -#define DECLARE_IOCTL_HANDLER(ident, func) \ - struct libcfs_ioctl_handler ident = { \ - /* .item = */ CFS_LIST_HEAD_INIT(ident.item), \ - /* .handle_ioctl = */ func \ - } - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); - -/* libcfs tcpip */ -#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 -#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 - -int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); -int libcfs_ipif_enumerate(char ***names); -void libcfs_ipif_free_enumeration(char **names, int n); -int libcfs_sock_listen(cfs_socket_t **sockp, __u32 ip, int port, int backlog); -int libcfs_sock_accept(cfs_socket_t **newsockp, cfs_socket_t *sock); -void libcfs_sock_abort_accept(cfs_socket_t *sock); -int libcfs_sock_connect(cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port); -int libcfs_sock_setbuf(cfs_socket_t *socket, int txbufsize, int rxbufsize); -int libcfs_sock_getbuf(cfs_socket_t *socket, int *txbufsize, int *rxbufsize); -int libcfs_sock_getaddr(cfs_socket_t *socket, int remote, __u32 *ip, int *port); -int libcfs_sock_write(cfs_socket_t *sock, void *buffer, int nob, int timeout); -int libcfs_sock_read(cfs_socket_t *sock, void *buffer, int nob, int timeout); -void libcfs_sock_release(cfs_socket_t *sock); - -/* libcfs watchdogs */ -struct lc_watchdog; - -/* Add a watchdog which fires after "time" milliseconds of delay. You have to - * touch it once to enable it. */ -struct lc_watchdog *lc_watchdog_add(int time, - void (*cb)(pid_t pid, void *), - void *data); - -/* Enables a watchdog and resets its timer. */ -void lc_watchdog_touch(struct lc_watchdog *lcw); - -/* Disable a watchdog; touch it to restart it. */ -void lc_watchdog_disable(struct lc_watchdog *lcw); - -/* Clean up the watchdog */ -void lc_watchdog_delete(struct lc_watchdog *lcw); - -/* Dump a debug log */ -void lc_watchdog_dumplog(pid_t pid, void *data); - -/* __KERNEL__ */ -#endif - -/* - * libcfs pseudo device operations - * - * struct cfs_psdev_t and - * cfs_psdev_register() and - * cfs_psdev_deregister() are declared in - * libcfs/<os>/cfs_prim.h - * - * It's just draft now. - */ - -struct cfs_psdev_file { - unsigned long off; - void *private_data; - unsigned long reserved1; - unsigned long reserved2; -}; - -struct cfs_psdev_ops { - int (*p_open)(unsigned long, void *); - int (*p_close)(unsigned long, void *); - int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); - int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *); -}; - -/* - * generic time manipulation functions. - */ - -static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2) -{ - return cfs_time_before(t2, t1); -} - -static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2) -{ - return cfs_time_beforeq(t2, t1); -} - -/* - * return seconds since UNIX epoch - */ -static inline time_t cfs_unix_seconds(void) -{ - cfs_fs_time_t t; - - cfs_fs_time_current(&t); - return (time_t)cfs_fs_time_sec(&t); -} - -static inline cfs_time_t cfs_time_shift(int seconds) -{ - return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); -} - -static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small, - struct timeval *result) -{ - long r = (long) ( - (large->tv_sec - small->tv_sec) * ONE_MILLION + - (large->tv_usec - small->tv_usec)); - if (result != NULL) { - result->tv_usec = r % ONE_MILLION; - result->tv_sec = r / ONE_MILLION; - } - return r; -} - -#define CFS_RATELIMIT(seconds) \ -({ \ - /* \ - * XXX nikita: non-portable initializer \ - */ \ - static time_t __next_message = 0; \ - int result; \ - \ - if (cfs_time_after(cfs_time_current(), __next_message)) \ - result = 1; \ - else { \ - __next_message = cfs_time_shift(seconds); \ - result = 0; \ - } \ - result; \ -}) - -struct libcfs_debug_msg_data { - cfs_debug_limit_state_t *msg_cdls; - int msg_subsys; - const char *msg_file; - const char *msg_fn; - int msg_line; -}; - -#define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \ - .msg_cdls = (cdls), \ - .msg_subsys = (subsystem), \ - .msg_file = (file), \ - .msg_fn = (func), \ - .msg_line = (ln) \ - } - - -extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, - int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) - __attribute__ ((format (printf, 9, 10))); - -#define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args) \ - libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL) - -#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \ - libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a) - -#define cdebug_va(cdls, mask, file, func, line, fmt, args) do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_vmsg(cdls, DEBUG_SUBSYSTEM, (mask), \ - (file), (func), (line), fmt, args); \ -} while(0); - -#define cdebug(cdls, mask, file, func, line, fmt, a...) do { \ - CHECK_STACK(); \ - \ - if (((mask) & D_CANTMASK) != 0 || \ - ((libcfs_debug & (mask)) != 0 && \ - (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ - libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask), \ - (file), (func), (line), fmt, ## a); \ -} while(0); - -extern void libcfs_assertion_failed(const char *expr, const char *file, - const char *fn, const int line); - -static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) -{ - if (cfs_time_after(cfs_time_current(), - cfs_time_add(now, cfs_time_seconds(15)))) - CERROR("slow %s %lu sec\n", msg, - cfs_duration_sec(cfs_time_sub(cfs_time_current(),now))); -} - -/* - * helper function similar to do_gettimeofday() of Linux kernel - */ -static inline void cfs_fs_timeval(struct timeval *tv) -{ - cfs_fs_time_t time; - - cfs_fs_time_current(&time); - cfs_fs_time_usec(&time, tv); -} - -/* - * return valid time-out based on user supplied one. Currently we only check - * that time-out is not shorted than allowed. - */ -static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout) -{ - if (timeout < CFS_TICK) - timeout = CFS_TICK; - return timeout; -} - -/* - * Universal memory allocator API - */ -enum cfs_alloc_flags { - /* allocation is not allowed to block */ - CFS_ALLOC_ATOMIC = (1 << 0), - /* allocation is allowed to block */ - CFS_ALLOC_WAIT = (1 << 1), - /* allocation should return zeroed memory */ - CFS_ALLOC_ZERO = (1 << 2), - /* allocation is allowed to call file-system code to free/clean - * memory */ - CFS_ALLOC_FS = (1 << 3), - /* allocation is allowed to do io to free/clean memory */ - CFS_ALLOC_IO = (1 << 4), - /* don't report allocation failure to the console */ - CFS_ALLOC_NOWARN = (1 << 5), - /* standard allocator flag combination */ - CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO, - CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO, -}; - -/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */ -enum cfs_alloc_page_flags { - /* allow to return page beyond KVM. It has to be mapped into KVM by - * cfs_page_map(); */ - CFS_ALLOC_HIGH = (1 << 5), - CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH, -}; - -/* - * portable UNIX device file identification. (This is not _very_ - * portable. Probably makes no sense for Windows.) - */ -/* - * Platform defines - * - * cfs_rdev_t - */ - -typedef unsigned int cfs_major_nr_t; -typedef unsigned int cfs_minor_nr_t; - -/* - * Defined by platform. - */ -cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor); -cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev); -cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev); - -/* - * Generic on-wire rdev format. - */ - -typedef __u32 cfs_wire_rdev_t; - -cfs_wire_rdev_t cfs_wire_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor); -cfs_major_nr_t cfs_wire_rdev_major(cfs_wire_rdev_t rdev); -cfs_minor_nr_t cfs_wire_rdev_minor(cfs_wire_rdev_t rdev); - -/* - * Drop into debugger, if possible. Implementation is provided by platform. - */ - -void cfs_enter_debugger(void); - -/* - * Defined by platform - */ -void cfs_daemonize(char *str); -int cfs_daemonize_ctxt(char *str); -cfs_sigset_t cfs_get_blocked_sigs(void); -cfs_sigset_t cfs_block_allsigs(void); -cfs_sigset_t cfs_block_sigs(cfs_sigset_t bits); -void cfs_restore_sigs(cfs_sigset_t); -int cfs_signal_pending(void); -void cfs_clear_sigpending(void); -/* - * XXX Liang: - * these macros should be removed in the future, - * we keep them just for keeping libcfs compatible - * with other branches. - */ -#define libcfs_daemonize(s) cfs_daemonize(s) -#define cfs_sigmask_lock(f) do { f= 0; } while (0) -#define cfs_sigmask_unlock(f) do { f= 0; } while (0) - -int convert_server_error(__u64 ecode); -int convert_client_oflag(int cflag, int *result); - -/* - * Stack-tracing filling. - */ - -/* - * Platform-dependent data-type to hold stack frames. - */ -struct cfs_stack_trace; - -/* - * Fill @trace with current back-trace. - */ -void cfs_stack_trace_fill(struct cfs_stack_trace *trace); - -/* - * Return instruction pointer for frame @frame_no. NULL if @frame_no is - * invalid. - */ -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no); - -/* - * Universal open flags. - */ -#define CFS_O_ACCMODE 0003 -#define CFS_O_CREAT 0100 -#define CFS_O_EXCL 0200 -#define CFS_O_NOCTTY 0400 -#define CFS_O_TRUNC 01000 -#define CFS_O_APPEND 02000 -#define CFS_O_NONBLOCK 04000 -#define CFS_O_NDELAY CFS_O_NONBLOCK -#define CFS_O_SYNC 010000 -#define CFS_O_ASYNC 020000 -#define CFS_O_DIRECT 040000 -#define CFS_O_LARGEFILE 0100000 -#define CFS_O_DIRECTORY 0200000 -#define CFS_O_NOFOLLOW 0400000 -#define CFS_O_NOATIME 01000000 - -/* convert local open flags to universal open flags */ -int cfs_oflags2univ(int flags); -/* convert universal open flags to local open flags */ -int cfs_univ2oflags(int flags); - -#define _LIBCFS_H - -#endif /* _LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/.cvsignore b/lnet/include/libcfs/linux/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/libcfs/linux/Makefile.am b/lnet/include/libcfs/linux/Makefile.am deleted file mode 100644 index 072a7ad3c5a5112e495dfd4159fe7503c2b97596..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h \ - linux-prim.h linux-time.h linux-tcpip.h lltrace.h \ - portals_compat25.h portals_utils.h diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h deleted file mode 100644 index a1e3b7c53f4e833b9fac4c038b989edf3f3e385c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/kp30.h +++ /dev/null @@ -1,349 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_KP30_H__ -#define __LIBCFS_LINUX_KP30_H__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#ifdef __KERNEL__ -#ifdef HAVE_KERNEL_CONFIG_H -# include <linux/config.h> -#endif -# include <linux/kernel.h> -# include <linux/mm.h> -# include <linux/string.h> -# include <linux/stat.h> -# include <linux/init.h> -# include <linux/errno.h> -# include <linux/unistd.h> -# include <asm/system.h> -# include <linux/kmod.h> -# include <linux/notifier.h> -# include <linux/fs.h> -# include <asm/segment.h> -# include <linux/miscdevice.h> -# include <linux/vmalloc.h> -# include <linux/time.h> -# include <linux/slab.h> -# include <linux/interrupt.h> -# include <linux/highmem.h> -# include <linux/module.h> -# include <linux/version.h> -# include <lnet/lnet.h> -# include <linux/smp_lock.h> -# include <asm/atomic.h> -# include <asm/uaccess.h> -# include <linux/rwsem.h> -# include <linux/proc_fs.h> -# include <linux/file.h> -# include <linux/smp.h> -# include <linux/ctype.h> -# include <linux/compiler.h> -# ifdef HAVE_MM_INLINE -# include <linux/mm_inline.h> -# endif -# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -# include <linux/kallsyms.h> -# include <linux/moduleparam.h> -# endif - -#include <libcfs/linux/portals_compat25.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define schedule_work schedule_task -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_TQUEUE((wq), 0, 0); \ - PREPARE_TQUEUE((wq), (cb), (cbdata)); \ -} while (0) - -#define PageUptodate Page_Uptodate -#define our_recalc_sigpending(current) recalc_sigpending(current) -#define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} -#define work_struct_t struct tq_struct - -#else - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ -} while (0) -#define wait_on_page wait_on_page_locked -#define our_recalc_sigpending(current) recalc_sigpending() -#define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} -#define work_struct_t struct work_struct - -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif -#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0) - -#define LIBCFS_PANIC(msg) panic(msg) - -/* ------------------------------------------------------------------- */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#define PORTAL_MODULE_USE MOD_INC_USE_COUNT -#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT -#else - -#define PORTAL_SYMBOL_REGISTER(x) -#define PORTAL_SYMBOL_UNREGISTER(x) - -#define PORTAL_SYMBOL_GET(x) symbol_get(x) -#define PORTAL_SYMBOL_PUT(x) symbol_put(x) - -#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) -#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) - -#endif - -/******************************************************************************/ -/* Module parameter support */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define CFS_MODULE_PARM(name, t, type, perm, desc) \ - MODULE_PARM(name, t);\ - MODULE_PARM_DESC(name, desc) - -#else -# define CFS_MODULE_PARM(name, t, type, perm, desc) \ - module_param(name, type, perm);\ - MODULE_PARM_DESC(name, desc) -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) -# define CFS_SYSFS_MODULE_PARM 0 /* no sysfs module parameters */ -#else -# define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */ -#endif -/******************************************************************************/ - -#if (__GNUC__) -/* Use the special GNU C __attribute__ hack to have the compiler check the - * printf style argument string against the actual argument count and - * types. - */ -#ifdef printf -# warning printf has been defined as a macro... -# undef printf -#endif - -#endif /* __GNUC__ */ - -# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) -# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) -# define time(a) CURRENT_TIME - -#else /* !__KERNEL__ */ -# include <stdio.h> -# include <stdlib.h> -#ifdef CRAY_XT3 -# include <ioctl.h> -#elif defined(__CYGWIN__) -# include <cygwin-ioctl.h> -#else -# include <stdint.h> -#endif -# include <unistd.h> -# include <time.h> -# include <limits.h> -# include <errno.h> -# include <sys/ioctl.h> /* for _IOWR */ - -# define CFS_MODULE_PARM(name, t, type, perm, desc) -#define PORTAL_SYMBOL_GET(x) inter_module_get(#x) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#endif /* End of !__KERNEL__ */ - -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -#define LWT_MEMORY (16<<20) - -#ifndef KLWT_SUPPORT -# if defined(__KERNEL__) -# if !defined(BITS_PER_LONG) -# error "BITS_PER_LONG not defined" -# endif -# elif !defined(__WORDSIZE) -# error "__WORDSIZE not defined" -# else -# define BITS_PER_LONG __WORDSIZE -# endif - -/* kernel hasn't defined this? */ -typedef struct { - long long lwte_when; - char *lwte_where; - void *lwte_task; - long lwte_p1; - long lwte_p2; - long lwte_p3; - long lwte_p4; -# if BITS_PER_LONG > 32 - long lwte_pad; -# endif -} lwt_event_t; -#endif /* !KLWT_SUPPORT */ - -#if LWT_SUPPORT -# ifdef __KERNEL__ -# if !KLWT_SUPPORT - -typedef struct _lwt_page { - struct list_head lwtp_list; - struct page *lwtp_page; - lwt_event_t *lwtp_events; -} lwt_page_t; - -typedef struct { - int lwtc_current_index; - lwt_page_t *lwtc_current_page; -} lwt_cpu_t; - -extern int lwt_enabled; -extern lwt_cpu_t lwt_cpus[]; - -/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. - * This stuff is meant for finding specific problems; it never stays in - * production code... */ - -#define LWTSTR(n) #n -#define LWTWHERE(f,l) f ":" LWTSTR(l) -#define LWT_EVENTS_PER_PAGE (CFS_PAGE_SIZE / sizeof (lwt_event_t)) - -#define LWT_EVENT(p1, p2, p3, p4) \ -do { \ - unsigned long flags; \ - lwt_cpu_t *cpu; \ - lwt_page_t *p; \ - lwt_event_t *e; \ - \ - if (lwt_enabled) { \ - local_irq_save (flags); \ - \ - cpu = &lwt_cpus[smp_processor_id()]; \ - p = cpu->lwtc_current_page; \ - e = &p->lwtp_events[cpu->lwtc_current_index++]; \ - \ - if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ - cpu->lwtc_current_page = \ - list_entry (p->lwtp_list.next, \ - lwt_page_t, lwtp_list); \ - cpu->lwtc_current_index = 0; \ - } \ - \ - e->lwte_when = get_cycles(); \ - e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ - e->lwte_task = current; \ - e->lwte_p1 = (long)(p1); \ - e->lwte_p2 = (long)(p2); \ - e->lwte_p3 = (long)(p3); \ - e->lwte_p4 = (long)(p4); \ - \ - local_irq_restore (flags); \ - } \ -} while (0) - -#endif /* !KLWT_SUPPORT */ - -extern int lwt_init (void); -extern void lwt_fini (void); -extern int lwt_lookup_string (int *size, char *knlptr, - char *usrptr, int usrsize); -extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size); -# else /* __KERNEL__ */ -# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ -# endif /* __KERNEL__ */ -#endif /* LWT_SUPPORT */ - -/* ------------------------------------------------------------------ */ - -#define IOCTL_LIBCFS_TYPE long - -#ifdef __CYGWIN__ -# ifndef BITS_PER_LONG -# if (~0UL) == 0xffffffffUL -# define BITS_PER_LONG 32 -# else -# define BITS_PER_LONG 64 -# endif -# endif -#endif - -#if BITS_PER_LONG > 32 -# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) -#else -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a) -#endif - -/* this is a bit chunky */ - -#if defined(__KERNEL__) - #define _LWORDSIZE BITS_PER_LONG -#else - #define _LWORDSIZE __WORDSIZE -#endif - -#if (defined(__x86_64__) && (defined(__KERNEL__) || defined(CRAY_XT3))) -/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */ -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPF64 "L" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (_LWORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPF64 "L" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (_LWORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPF64 "l" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - -#undef _LWORDSIZE - -#endif diff --git a/lnet/include/libcfs/linux/libcfs.h b/lnet/include/libcfs/linux/libcfs.h deleted file mode 100644 index 0aac9194156a51040dee579f97616dd06ac92c90..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/libcfs.h +++ /dev/null @@ -1,157 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_LIBCFS_H__ -#define __LIBCFS_LINUX_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#include <stdarg.h> -#include <libcfs/linux/linux-mem.h> -#include <libcfs/linux/linux-time.h> -#include <libcfs/linux/linux-prim.h> -#include <libcfs/linux/linux-lock.h> -#include <libcfs/linux/linux-fs.h> -#include <libcfs/linux/linux-tcpip.h> - -#ifdef HAVE_ASM_TYPES_H -#include <asm/types.h> -#else -#include <libcfs/types.h> -#endif - - -#ifdef __KERNEL__ -# include <linux/types.h> -# include <linux/time.h> -# include <asm/timex.h> -#else -# include <sys/types.h> -# include <sys/time.h> -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif - -#ifndef __KERNEL__ -/* Userpace byte flipping */ -# include <endian.h> -# include <byteswap.h> -# define __swab16(x) bswap_16(x) -# define __swab32(x) bswap_32(x) -# define __swab64(x) bswap_64(x) -# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) -# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) -# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define le16_to_cpu(x) (x) -# define cpu_to_le16(x) (x) -# define le32_to_cpu(x) (x) -# define cpu_to_le32(x) (x) -# define le64_to_cpu(x) (x) -# define cpu_to_le64(x) (x) -# else -# if __BYTE_ORDER == __BIG_ENDIAN -# define le16_to_cpu(x) bswap_16(x) -# define cpu_to_le16(x) bswap_16(x) -# define le32_to_cpu(x) bswap_32(x) -# define cpu_to_le32(x) bswap_32(x) -# define le64_to_cpu(x) bswap_64(x) -# define cpu_to_le64(x) bswap_64(x) -# else -# error "Unknown byte order" -# endif /* __BIG_ENDIAN */ -# endif /* __LITTLE_ENDIAN */ -#endif /* ! __KERNEL__ */ - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - -#ifdef __KERNEL__ -# include <linux/sched.h> /* THREAD_SIZE */ -#else -# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ -# define THREAD_SIZE 8192 -# endif -#endif - -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#if defined(__KERNEL__) && !defined(__x86_64__) -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -# endif /* __ia64__ */ - -#define __CHECK_STACK(file, func, line) \ -do { \ - unsigned long _stack = CDEBUG_STACK(); \ - \ - if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ - libcfs_stack = _stack; \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ - file, func, line, \ - "maximum lustre stack %lu\n", _stack); \ - /*panic("LBUG");*/ \ - } \ -} while (0) -#define CHECK_STACK() __CHECK_STACK(__FILE__, __func__, __LINE__) -#else /* !__KERNEL__ */ -#define __CHECK_STACK(X, Y, Z) do { } while(0) -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) -#endif /* __KERNEL__ */ - -/* initial pid */ -#define LUSTRE_LNET_PID 12345 - -#define ENTRY_NESTING_SUPPORT (1) -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -/* - * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) - * - * Implementation is in linux-curproc.c - */ -#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm) - -#if defined(__KERNEL__) -#include <linux/capability.h> -typedef kernel_cap_t cfs_kernel_cap_t; -#else -typedef __u32 cfs_kernel_cap_t; -#endif - -#if defined(__KERNEL__) -/* - * No stack-back-tracing in Linux for now. - */ -struct cfs_stack_trace { -}; - -#ifndef WITH_WATCHDOG -#define WITH_WATCHDOG -#endif - -#endif - -#endif /* _LINUX_LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/linux-fs.h b/lnet/include/libcfs/linux/linux-fs.h deleted file mode 100644 index 3ba54611e4991151a6139a71eea80ee881634365..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-fs.h +++ /dev/null @@ -1,87 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_FS_H__ -#define __LIBCFS_LINUX_CFS_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/mount.h> -#else /* !__KERNEL__ */ -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <sys/mount.h> -#include <mntent.h> -#endif /* __KERNEL__ */ - -typedef struct file cfs_file_t; -typedef struct dentry cfs_dentry_t; - -#ifdef __KERNEL__ -#define cfs_filp_size(f) ((f)->f_dentry->d_inode->i_size) -#define cfs_filp_poff(f) (&(f)->f_pos) - -/* - * XXX Do we need to parse flags and mode in cfs_filp_open? - */ -cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err); -#define cfs_filp_close(f) filp_close(f, NULL) -#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos) -#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos) -#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1) - -#define cfs_get_file(f) get_file(f) -#define cfs_put_file(f) fput(f) -#define cfs_file_count(f) file_count(f) - -typedef struct file_lock cfs_flock_t; -#define cfs_flock_type(fl) ((fl)->fl_type) -#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->fl_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->fl_start) -#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) -#define cfs_flock_end(fl) ((fl)->fl_end) -#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) - -ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset); - -/* - * portable UNIX device file identification. - */ - -typedef dev_t cfs_rdev_t; - -#endif - -#endif diff --git a/lnet/include/libcfs/linux/linux-lock.h b/lnet/include/libcfs/linux/linux-lock.h deleted file mode 100644 index f419c9b5a776b01ea7dff6d842c0fa673ec2e588..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-lock.h +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_LOCK_H__ -#define __LIBCFS_LINUX_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/smp_lock.h> - -/* - * IMPORTANT !!!!!!!! - * - * All locks' declaration are not guaranteed to be initialized, - * Althought some of they are initialized in Linux. All locks - * declared by CFS_DECL_* should be initialized explicitly. - */ - - -/* - * spin_lock (use Linux kernel's primitives) - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - */ - -/* - * rw_semaphore (use Linux kernel's primitives) - * - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ - -/* - * rwlock_t (use Linux kernel's primitives) - * - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ - -/* - * mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ -#define init_mutex(x) init_MUTEX(x) -#define init_mutex_locked(x) init_MUTEX_LOCKED(x) -#define mutex_up(x) up(x) -#define mutex_down(x) down(x) - -/* - * completion (use Linux kernel's primitives) - * - * - init_complition(c) - * - complete(c) - * - wait_for_completion(c) - */ - -/* __KERNEL__ */ -#else - -#include "../user-lock.h" - -/* __KERNEL__ */ -#endif -#endif diff --git a/lnet/include/libcfs/linux/linux-mem.h b/lnet/include/libcfs/linux/linux-mem.h deleted file mode 100644 index 7591213ffbe6d14a466447f7daf84f90e24af4ef..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-mem.h +++ /dev/null @@ -1,121 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_MEM_H__ -#define __LIBCFS_LINUX_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -# include <linux/mm.h> -# include <linux/vmalloc.h> -# include <linux/pagemap.h> -# include <linux/slab.h> -# ifdef HAVE_MM_INLINE -# include <linux/mm_inline.h> -# endif - -typedef struct page cfs_page_t; -#define CFS_PAGE_SIZE PAGE_CACHE_SIZE -#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) - -cfs_page_t *cfs_alloc_page(unsigned int flags); -#define cfs_free_page(p) __free_pages(p, 0) - -static inline void *cfs_page_address(cfs_page_t *page) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return page_address(page); -} - -static inline void *cfs_kmap(cfs_page_t *page) -{ - return kmap(page); -} - -static inline void cfs_kunmap(cfs_page_t *page) -{ - kunmap(page); -} - -static inline void cfs_get_page(cfs_page_t *page) -{ - get_page(page); -} - -static inline int cfs_page_count(cfs_page_t *page) -{ - return page_count(page); -} - -#define cfs_page_index(p) ((p)->index) - -/* - * Memory allocator - * XXX Liang: move these declare to public file - */ -extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -extern void cfs_free(void *addr); - -extern void *cfs_alloc_large(size_t nr_bytes); -extern void cfs_free_large(void *addr); - -/* - * In Linux there is no way to determine whether current execution context is - * blockable. - */ -#define CFS_ALLOC_ATOMIC_TRY CFS_ALLOC_ATOMIC - -/* - * SLAB allocator - * XXX Liang: move these declare to public file - */ -typedef kmem_cache_t cfs_mem_cache_t; -extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); -extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - -/* - */ -#define CFS_DECL_MMSPACE mm_segment_t __oldfs -#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0) -#define CFS_MMSPACE_CLOSE set_fs(__oldfs) - -#else /* !__KERNEL__ */ -#ifdef HAVE_ASM_PAGE_H -#include <asm/page.h> /* needed for PAGE_SIZE - rread */ -#endif - -#include <libcfs/user-prim.h> -/* __KERNEL__ */ -#endif - -#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/linux/linux-prim.h b/lnet/include/libcfs/linux/linux-prim.h deleted file mode 100644 index 41eeb8adeb068c113db058cf76c8e54866a8d36b..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-prim.h +++ /dev/null @@ -1,205 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_PRIM_H__ -#define __LIBCFS_LINUX_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/proc_fs.h> -#include <linux/mm.h> -#include <linux/timer.h> - -#include <linux/miscdevice.h> -#include <libcfs/linux/portals_compat25.h> -#include <asm/div64.h> - -#include <libcfs/linux/linux-time.h> - -/* - * Pseudo device register - */ -typedef struct miscdevice cfs_psdev_t; -#define cfs_psdev_register(dev) misc_register(dev) -#define cfs_psdev_deregister(dev) misc_deregister(dev) - -/* - * Sysctl register - */ -typedef struct ctl_table cfs_sysctl_table_t; -typedef struct ctl_table_header cfs_sysctl_table_header_t; - -#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a) -#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t, a) - -/* - * Symbol register - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define cfs_symbol_register(s, p) inter_module_register(s, THIS_MODULE, p) -#define cfs_symbol_unregister(s) inter_module_unregister(s) -#define cfs_symbol_get(s) inter_module_get(s) -#define cfs_symbol_put(s) inter_module_put(s) -#define cfs_module_get() MOD_INC_USE_COUNT -#define cfs_module_put() MOD_DEC_USE_COUNT -#else -#define cfs_symbol_register(s, p) do {} while(0) -#define cfs_symbol_unregister(s) do {} while(0) -#define cfs_symbol_get(s) symbol_get(s) -#define cfs_symbol_put(s) symbol_put(s) -#define cfs_module_get() try_module_get(THIS_MODULE) -#define cfs_module_put() module_put(THIS_MODULE) -#endif - -/* - * Proc file system APIs - */ -typedef read_proc_t cfs_read_proc_t; -typedef write_proc_t cfs_write_proc_t; -typedef struct proc_dir_entry cfs_proc_dir_entry_t; -#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p) -#define cfs_free_proc_entry(e) free_proc_entry(e) -#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e) - -/* - * Wait Queue - */ -#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE -#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE - -typedef wait_queue_t cfs_waitlink_t; -typedef wait_queue_head_t cfs_waitq_t; - -typedef long cfs_task_state_t; - -#define cfs_waitq_init(w) init_waitqueue_head(w) -#define cfs_waitlink_init(l) init_waitqueue_entry(l, current) -#define cfs_waitq_add(w, l) add_wait_queue(w, l) -#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l) -#define cfs_waitq_forward(l, w) do {} while(0) -#define cfs_waitq_del(w, l) remove_wait_queue(w, l) -#define cfs_waitq_active(w) waitqueue_active(w) -#define cfs_waitq_signal(w) wake_up(w) -#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n) -#define cfs_waitq_broadcast(w) wake_up_all(w) -#define cfs_waitq_wait(l, s) schedule() -#define cfs_waitq_timedwait(l, s, t) schedule_timeout(t) -#define cfs_schedule_timeout(s, t) schedule_timeout(t) -#define cfs_schedule() schedule() - -/* Kernel thread */ -typedef int (*cfs_thread_t)(void *); -#define cfs_kernel_thread(func, a, f) kernel_thread(func, a, f) - -/* - * Task struct - */ -typedef struct task_struct cfs_task_t; -#define cfs_current() current -#define cfs_task_lock(t) task_lock(t) -#define cfs_task_unlock(t) task_unlock(t) -#define CFS_DECL_JOURNAL_DATA void *journal_info -#define CFS_PUSH_JOURNAL do { \ - journal_info = current->journal_info; \ - current->journal_info = NULL; \ - } while(0) -#define CFS_POP_JOURNAL do { \ - current->journal_info = journal_info; \ - } while(0) - -/* Module interfaces */ -#define cfs_module(name, version, init, fini) \ -module_init(init); \ -module_exit(fini) - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -/* - * Timer - */ -typedef struct timer_list cfs_timer_t; -typedef void (*timer_func_t)(unsigned long); - -#define cfs_init_timer(t) init_timer(t) - -static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg) -{ - init_timer(t); - t->function = (timer_func_t)func; - t->data = (unsigned long)arg; -} - -static inline void cfs_timer_done(cfs_timer_t *t) -{ - return; -} - -static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline) -{ - mod_timer(t, deadline); -} - -static inline void cfs_timer_disarm(cfs_timer_t *t) -{ - del_timer(t); -} - -static inline int cfs_timer_is_armed(cfs_timer_t *t) -{ - return timer_pending(t); -} - -static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t) -{ - return t->expires; -} - - -/* deschedule for a bit... */ -static inline void cfs_pause(cfs_duration_t ticks) -{ - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(ticks); -} - -#else /* !__KERNEL__ */ - -typedef struct proc_dir_entry cfs_proc_dir_entry_t; -#include "../user-prim.h" - -#endif /* __KERNEL__ */ - -#endif diff --git a/lnet/include/libcfs/linux/linux-tcpip.h b/lnet/include/libcfs/linux/linux-tcpip.h deleted file mode 100644 index 2d149044f8a38090a792ddf718281ead21d45ea9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-tcpip.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_LINUX_CFS_TCP_H__ -#define __LIBCFS_LINUX_CFS_TCP_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ -#include <net/sock.h> - -typedef struct socket cfs_socket_t; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) -# define sk_allocation allocation -# define sk_data_ready data_ready -# define sk_write_space write_space -# define sk_user_data user_data -# define sk_prot prot -# define sk_sndbuf sndbuf -# define sk_rcvbuf rcvbuf -# define sk_socket socket -# define sk_sleep sleep -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -# define sk_wmem_queued wmem_queued -# define sk_err err -# define sk_route_caps route_caps -#endif - -#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf) -#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued) -#define SOCK_ERROR(so) ((so)->sk->sk_err) -#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) - -#endif - -#endif diff --git a/lnet/include/libcfs/linux/linux-time.h b/lnet/include/libcfs/linux/linux-time.h deleted file mode 100644 index e928387a795df7d50b2762fd4d90735950d939e5..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/linux-time.h +++ /dev/null @@ -1,316 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Linux (kernel and user-level). - * - */ - -#ifndef __LIBCFS_LINUX_LINUX_TIME_H__ -#define __LIBCFS_LINUX_LINUX_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -#ifdef __KERNEL__ -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/time.h> -#include <asm/div64.h> - -#include <libcfs/linux/portals_compat25.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -/* - * old kernels---CURRENT_TIME is struct timeval - */ -typedef struct timeval cfs_fs_time_t; - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * 1000; -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return (unsigned long long)t->tv_sec * ONE_MILLION + t->tv_usec; -} - -#define CURRENT_KERN_TIME xtime - -#else -/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ - -/* - * post 2.5 kernels. - */ - -#include <linux/jiffies.h> - -typedef struct timespec cfs_fs_time_t; - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - v->tv_sec = t->tv_sec; - v->tv_usec = t->tv_nsec / 1000; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - *s = *t; -} - -/* - * internal helper function used by cfs_fs_time_before*() - */ -static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec; -} - -#define CURRENT_KERN_TIME CURRENT_TIME - -/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ -#endif - -/* - * Generic kernel stuff - */ - -typedef unsigned long cfs_time_t; /* jiffies */ -typedef long cfs_duration_t; - - -static inline cfs_time_t cfs_time_current(void) -{ - return jiffies; -} - -static inline time_t cfs_time_current_sec(void) -{ - return CURRENT_SECONDS; -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return time_before(t1, t2); -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return time_before_eq(t1, t2); -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - *t = CURRENT_KERN_TIME; -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2); -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2); -} - -#if 0 -static inline cfs_duration_t cfs_duration_build(int64_t nano) -{ -#if (BITS_PER_LONG == 32) - /* We cannot use do_div(t, ONE_BILLION), do_div can only process - * 64 bits n and 32 bits base */ - int64_t t = nano * HZ; - do_div(t, 1000); - do_div(t, 1000000); - return (cfs_duration_t)t; -#else - return (nano * HZ / ONE_BILLION); -#endif -} -#endif - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return ((cfs_duration_t)seconds) * HZ; -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / HZ; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ -#if (BITS_PER_LONG == 32) && (HZ > 4096) - __u64 t; - - s->tv_sec = d / HZ; - t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION; - s->tv_usec = do_div (t, HZ); -#else - s->tv_sec = d / HZ; - s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION) / HZ; -#endif -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ -#if (BITS_PER_LONG == 32) - __u64 t; - - s->tv_sec = d / HZ; - t = (d - s->tv_sec * HZ) * ONE_BILLION; - s->tv_nsec = do_div (t, HZ); -#else - s->tv_sec = d / HZ; - s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ; -#endif -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) - -#define cfs_time_current_64 get_jiffies_64 - -static inline __u64 cfs_time_add_64(__u64 t, __u64 d) -{ - return t + d; -} - -static inline __u64 cfs_time_shift_64(int seconds) -{ - return cfs_time_add_64(cfs_time_current_64(), - cfs_time_seconds(seconds)); -} - -static inline int cfs_time_before_64(__u64 t1, __u64 t2) -{ - return (__s64)t2 - (__s64)t1 > 0; -} - -#else -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before - -#endif - -/* - * One jiffy - */ -#define CFS_TICK (1) - -#define CFS_TIME_T "%lu" -#define CFS_DURATION_T "%ld" - -#else /* !__KERNEL__ */ - -/* - * Liblustre. time(2) based implementation. - */ -#include <libcfs/user-time.h> -#endif /* __KERNEL__ */ - -/* __LIBCFS_LINUX_LINUX_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/linux/lltrace.h b/lnet/include/libcfs/linux/lltrace.h deleted file mode 100644 index 1ddd03d41a8ec6f6349ddcbd227b520e360dedc9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/lltrace.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_LLTRACE_H__ -#define __LIBCFS_LINUX_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/time.h> -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnetctl.h> -#include <linux/limits.h> -#include <asm/page.h> -#include <linux/version.h> - -#endif diff --git a/lnet/include/libcfs/linux/portals_compat25.h b/lnet/include/libcfs/linux/portals_compat25.h deleted file mode 100644 index 657c01155ec707378732fa99fd260a633487ec56..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/portals_compat25.h +++ /dev/null @@ -1,123 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__ -#define __LIBCFS_LINUX_PORTALS_COMPAT_H__ - -// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved -#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_ASSERT() \ - LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) -# else -# define SIGNAL_MASK_ASSERT() \ - LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) -# endif -#else -# define SIGNAL_MASK_ASSERT() -#endif -// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp, 1) -# define RECALC_SIGPENDING recalc_sigpending() -# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ - TIF_SIGPENDING) -# define CURRENT_SECONDS get_seconds() -# define smp_num_cpus num_online_cpus() - - -#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp) -# define RECALC_SIGPENDING recalc_sigpending() -# define CLEAR_SIGPENDING (current->sigpending = 0) -# define CURRENT_SECONDS CURRENT_TIME -# define wait_event_interruptible_exclusive(wq, condition) \ - wait_event_interruptible(wq, condition) - -#else /* 2.4.x */ - -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sigmask_lock, flags) -# define USERMODEHELPER(path, argv, envp) \ - call_usermodehelper(path, argv, envp) -# define RECALC_SIGPENDING recalc_sigpending(current) -# define CLEAR_SIGPENDING (current->sigpending = 0) -# define CURRENT_SECONDS CURRENT_TIME -# define wait_event_interruptible_exclusive(wq, condition) \ - wait_event_interruptible(wq, condition) - -#endif - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) -#define UML_PID(tsk) ((tsk)->thread.extern_pid) -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid) -#else -#define UML_PID(tsk) ((tsk)->pid) -#endif - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len,fmt"|%d", ## a, UML_PID(current)) -#else -# define THREAD_NAME(comm, len, fmt, a...) \ - snprintf(comm, len, fmt, ## a) -#endif - -#ifdef HAVE_PAGE_LIST -/* 2.4 alloc_page users can use page->list */ -#define PAGE_LIST_ENTRY list -#define PAGE_LIST(page) ((page)->list) -#else -/* 2.6 alloc_page users can use page->lru */ -#define PAGE_LIST_ENTRY lru -#define PAGE_LIST(page) ((page)->lru) -#endif - -#ifndef HAVE_CPU_ONLINE -#define cpu_online(cpu) ((1<<cpu) & (cpu_online_map)) -#endif -#ifndef HAVE_CPUMASK_T -typedef unsigned long cpumask_t; -#define cpu_set(cpu, map) set_bit(cpu, &(map)) -#define cpus_clear(map) memset(&(map), 0, sizeof(cpumask_t)) -#endif - -#ifndef __user -#define __user -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) -#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ - proc_dointvec(table, write, filp, buffer, lenp) -#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ - proc_dostring(table, write, filp, buffer, lenp) -#define LL_PROC_PROTO(name) \ - name(ctl_table *table, int write, struct file *filp, \ - void __user *buffer, size_t *lenp) -#else -#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ - proc_dointvec(table, write, filp, buffer, lenp, ppos); -#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ - proc_dostring(table, write, filp, buffer, lenp, ppos); -#define LL_PROC_PROTO(name) \ - name(ctl_table *table, int write, struct file *filp, \ - void __user *buffer, size_t *lenp, loff_t *ppos) -#endif - -#endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/libcfs/linux/portals_utils.h b/lnet/include/libcfs/linux/portals_utils.h deleted file mode 100644 index ae319af8e7bdf5d80c807daf06813742de7f3fd9..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/linux/portals_utils.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__ -#define __LIBCFS_LINUX_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#ifdef __KERNEL__ -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/wait.h> -#include <linux/smp_lock.h> -#include <linux/poll.h> -#include <linux/random.h> - -#include <asm/unistd.h> -#include <asm/semaphore.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# include <linux/tqueue.h> -#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */ -# include <linux/workqueue.h> -#endif -#include <libcfs/linux/linux-mem.h> -#include <libcfs/linux/linux-prim.h> -#else /* !__KERNEL__ */ - -#include <endian.h> -#include <libcfs/list.h> - -#ifdef HAVE_LINUX_VERSION_H -# include <linux/version.h> - -# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# define BUG() /* workaround for module.h includes */ -# include <linux/module.h> -# endif -#endif /* !HAVE_LINUX_VERSION_H */ - -#ifndef __CYGWIN__ -# include <syscall.h> -#else /* __CYGWIN__ */ -# include <windows.h> -# include <windef.h> -# include <netinet/in.h> -#endif /* __CYGWIN__ */ - -#endif /* !__KERNEL__ */ -#endif diff --git a/lnet/include/libcfs/list.h b/lnet/include/libcfs/list.h deleted file mode 100644 index 5c27071d1b603dbe11ca184ea5ec625e58709f1d..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/list.h +++ /dev/null @@ -1,453 +0,0 @@ -#ifndef __LIBCFS_LIST_H__ -#define __LIBCFS_LIST_H__ - -#if defined (__linux__) && defined(__KERNEL__) - -#include <linux/list.h> - -#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n) -#define CFS_LIST_HEAD(n) LIST_HEAD(n) -#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p) - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#define CFS_HLIST_HEAD_INIT HLIST_HEAD_INIT -#define CFS_HLIST_HEAD(n) HLIST_HEAD(n) -#define CFS_INIT_HLIST_HEAD(p) INIT_HLIST_HEAD(p) -#define CFS_INIT_HLIST_NODE(p) INIT_HLIST_NODE(p) -#endif - -#else /* !defined (__linux__) || !defined(__KERNEL__) */ - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -#ifndef __WINNT__ -#define prefetch(a) ((void)a) -#else -#define prefetch(a) ((void *)a) -#endif - -struct list_head { - struct list_head *next, *prev; -}; - -typedef struct list_head list_t; - -#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } - -#define CFS_LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define CFS_INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -#ifndef __APPLE__ -#define LIST_HEAD(n) CFS_LIST_HEAD(n) -#endif - -#define LIST_HEAD_INIT(n) CFS_LIST_HEAD_INIT(n) -#define INIT_LIST_HEAD(p) CFS_INIT_LIST_HEAD(p) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head * new, - struct list_head * prev, - struct list_head * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - CFS_INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - * - * This is not safe to use if @list is already on the same list as @head. - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - * - * This is not safe to use if @list is already on the same list as @head. - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(struct list_head *head) -{ - return head->next == head; -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - CFS_INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next, prefetch(pos->next); pos != (head); \ - pos = pos->next, prefetch(pos->next)) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/* - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is - * too wasteful. - * You lose the ability to access the tail in O(1). - */ - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -/* - * "NULL" might not be defined at this point - */ -#ifdef NULL -#define NULL_P NULL -#else -#define NULL_P ((void *)0) -#endif - -#define CFS_HLIST_HEAD_INIT { .first = NULL_P } -#define CFS_HLIST_HEAD(name) struct hlist_head name = { .first = NULL_P } -#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) -#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) - -#define HLIST_HEAD_INIT CFS_HLIST_HEAD_INIT -#define HLIST_HEAD(n) CFS_HLIST_HEAD(n) -#define INIT_HLIST_HEAD(p) CFS_INIT_HLIST_HEAD(p) -#define INIT_HLIST_NODE(p) CFS_INIT_HLIST_NODE(p) - -static inline int hlist_unhashed(const struct hlist_node *h) -{ - return !h->pprev; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); -} - -static inline void hlist_del_init(struct hlist_node *n) -{ - if (n->pprev) { - __hlist_del(n); - INIT_HLIST_NODE(n); - } -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ - pos = pos->next) - -#define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ - pos = n) - -/** - * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_from - iterate over a hlist continuing from existing point - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop counter. - * @pos: the &struct hlist_node to use as a loop counter. - * @n: another &struct hlist_node to use as temporary storage - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#endif /* __linux__ && __KERNEL__ */ - -#ifndef list_for_each_prev -/** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -#endif /* list_for_each_prev */ - -#ifndef list_for_each_entry -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) -#endif /* list_for_each_entry */ - -#ifndef list_for_each_entry_reverse -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) -#endif /* list_for_each_entry_reverse */ - -#ifndef list_for_each_entry_safe -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) -#endif /* list_for_each_entry_safe */ - -#endif /* __LIBCFS_LUSTRE_LIST_H__ */ diff --git a/lnet/include/libcfs/lltrace.h b/lnet/include/libcfs/lltrace.h deleted file mode 100644 index dbeae911d2e24b26326a422797eaa3f7d48882ea..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/lltrace.h +++ /dev/null @@ -1,167 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl - */ -#ifndef __LIBCFS_LLTRACE_H__ -#define __LIBCFS_LLTRACE_H__ - -#if defined(__linux__) -#include <libcfs/linux/lltrace.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/lltrace.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/lltrace.h> -#else -#error Unsupported Operating System -#endif - -static inline int ltrace_write_file(char* fname) -{ - char* argv[3]; - - argv[0] = "debug_kernel"; - argv[1] = fname; - argv[2] = "1"; - - fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - - return jt_dbg_debug_kernel(3, argv); -} - -static inline int ltrace_clear() -{ - char* argv[1]; - - argv[0] = "clear"; - - fprintf(stderr, "[ptlctl] %s\n", argv[0]); - - return jt_dbg_clear_debug_buf(1, argv); -} - -static inline int ltrace_mark(int indent_level, char* text) -{ - char* argv[2]; - char mark_buf[PATH_MAX]; - - snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - - argv[0] = "mark"; - argv[1] = mark_buf; - return jt_dbg_mark_debug_buf(2, argv); -} - -static inline int ltrace_applymasks() -{ - char* argv[2]; - argv[0] = "list"; - argv[1] = "applymasks"; - - fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - - return jt_dbg_list(2, argv); -} - - -static inline int ltrace_filter(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "filter"; - argv[1] = subsys_or_mask; - return jt_dbg_filter(2, argv); -} - -static inline int ltrace_show(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "show"; - argv[1] = subsys_or_mask; - return jt_dbg_show(2, argv); -} - -static inline int ltrace_start() -{ - int rc = 0; - dbg_initialize(0, NULL); -#ifdef LNET_DEV_ID - rc = register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); -#endif - ltrace_filter("class"); - ltrace_filter("nal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); - ltrace_applymasks(); - - return rc; -} - - -static inline void ltrace_stop() -{ -#ifdef LNET_DEV_ID - unregister_ioc_dev(LNET_DEV_ID); -#endif -} - -static inline int not_uml() -{ - /* Return Values: - * 0 when run under UML - * 1 when run on host - * <0 when lookup failed - */ - struct stat buf; - int rc = stat("/dev/ubd", &buf); - rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; - if (rc<0) { - fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); - rc = 1; /* Assume host */ - } - return rc; -} - -#define LTRACE_MAX_NOB 256 -static inline void ltrace_add_processnames(char* fname) -{ - char cmdbuf[LTRACE_MAX_NOB]; - struct timeval tv; - struct timezone tz; - int nob; - int underuml = !not_uml(); - - gettimeofday(&tv, &tz); - - nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); - - /* Careful - these format strings need to match the CDEBUG - * formats in portals/linux/debug.c EXACTLY - */ - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", - S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); - - if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d | %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); - } - else { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); - } - - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); - system(cmdbuf); -} - -#endif diff --git a/lnet/include/libcfs/portals_utils.h b/lnet/include/libcfs/portals_utils.h deleted file mode 100644 index b79eb7eb00efa0d891b951f270bec92ab70333e4..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/portals_utils.h +++ /dev/null @@ -1,21 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#define __LIBCFS_PORTALS_UTILS_H__ - -/* - * portals_utils.h - * - */ -#if defined(__linux__) -#include <libcfs/linux/portals_utils.h> -#elif defined(__APPLE__) -#include <libcfs/darwin/portals_utils.h> -#elif defined(__WINNT__) -#include <libcfs/winnt/portals_utils.h> -#else -#error Unsupported Operating System -#endif - -#endif diff --git a/lnet/include/libcfs/types.h b/lnet/include/libcfs/types.h deleted file mode 100755 index 71dd7fb1e1ca3e135e9762c3160812687f8b062a..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _LIBCFS_TYPES_H -#define _LIBCFS_TYPES_H - -/* - * This file was inttroduced to resolve XT3 (Catamount) build issues. - * The orignal idea was to move <lustre/types.h> here however at - * the time of this writing - * it's unclear what external dependencies are tied - * to that file (It's not just some source file #including it) - * there is some build/packaging infrastructure that includes it. - * Hopefully that will be resolved shortly, that file will - * be removed, its contents copied here and this comment can be deleted. - */ - -#include <lustre/types.h> - -#endif diff --git a/lnet/include/libcfs/user-lock.h b/lnet/include/libcfs/user-lock.h deleted file mode 100644 index cea7a6d6faa83d9c6c958d9ea61c80e4cd0bdf13..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-lock.h +++ /dev/null @@ -1,207 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_LOCK_H__ -#define __LIBCFS_USER_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Implementations of portable synchronization APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - * - * XXX Liang: There are several branches share lnet with b_hd_newconfig, - * if we define lock APIs at here, there will be conflict with liblustre - * in other branches. - */ - -#ifndef __KERNEL__ -#include <stdio.h> -#include <stdlib.h> - -#if 0 -/* - * Optional debugging (magic stamping and checking ownership) can be added. - */ - -/* - * spin_lock - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - * - * No-op implementation. - */ -struct spin_lock {int foo;}; - -typedef struct spin_lock spinlock_t; - -#define SPIN_LOCK_UNLOCKED (spinlock_t) { } -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) - -void spin_lock_init(spinlock_t *lock); -void spin_lock(spinlock_t *lock); -void spin_unlock(spinlock_t *lock); -int spin_trylock(spinlock_t *lock); -void spin_lock_bh_init(spinlock_t *lock); -void spin_lock_bh(spinlock_t *lock); -void spin_unlock_bh(spinlock_t *lock); -static inline int spin_is_locked(spinlock_t *l) {return 1;} - -static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){} -static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){} - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -typedef struct semaphore { - int foo; -} mutex_t; - -void sema_init(struct semaphore *s, int val); -void __down(struct semaphore *s); -void __up(struct semaphore *s); - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -#if 0 -struct completion {}; - -void init_completion(struct completion *c); -void complete(struct completion *c); -void wait_for_completion(struct completion *c); -#endif - -/* - * rw_semaphore: - * - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore {}; - -void init_rwsem(struct rw_semaphore *s); -void down_read(struct rw_semaphore *s); -int down_read_trylock(struct rw_semaphore *s); -void down_write(struct rw_semaphore *s); -int down_write_trylock(struct rw_semaphore *s); -void up_read(struct rw_semaphore *s); -void up_write(struct rw_semaphore *s); - -/* - * read-write lock : Need to be investigated more!! - * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore - * - * - DECLARE_RWLOCK(l) - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ -typedef struct rw_semaphore rwlock_t; - -#define rwlock_init(pl) init_rwsem(pl) - -#define read_lock(l) down_read(l) -#define read_unlock(l) up_read(l) -#define write_lock(l) down_write(l) -#define write_unlock(l) up_write(l) - -static inline void -write_lock_irqsave(rwlock_t *l, unsigned long f) { write_lock(l); } -static inline void -write_unlock_irqrestore(rwlock_t *l, unsigned long f) { write_unlock(l); } - -static inline void -read_lock_irqsave(rwlock_t *l, unsigned long f) { read_lock(l); } -static inline void -read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); } - -/* - * Atomic for user-space - * Copied from liblustre - */ -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } -#define atomic_read(a) ((a)->counter) -#define atomic_set(a,b) do {(a)->counter = b; } while (0) -#define atomic_dec_and_test(a) ((--((a)->counter)) == 0) -#define atomic_inc(a) (((a)->counter)++) -#define atomic_dec(a) do { (a)->counter--; } while (0) -#define atomic_add(b,a) do {(a)->counter += b;} while (0) -#define atomic_sub(b,a) do {(a)->counter -= b;} while (0) - -#endif - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_LOCK_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/user-prim.h b/lnet/include/libcfs/user-prim.h deleted file mode 100644 index 54f783217c61b625407982f8c231a1c27c0bd776..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-prim.h +++ /dev/null @@ -1,305 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_PRIM_H__ -#define __LIBCFS_USER_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Implementations of portable APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - */ - -#ifndef __KERNEL__ - -#include <stdlib.h> -#include <string.h> -#include <sys/signal.h> -#include <sys/mman.h> -#include <libcfs/list.h> -#include <libcfs/user-time.h> -#include <signal.h> -#include <stdlib.h> - -/* - * Wait Queue. No-op implementation. - */ - -typedef struct cfs_waitlink { - struct list_head sleeping; - void *process; -} cfs_waitlink_t; - -typedef struct cfs_waitq { - struct list_head sleepers; -} cfs_waitq_t; - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq, int state); -void cfs_waitq_wait(struct cfs_waitlink *link); -int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout); -#define cfs_schedule_timeout(s, t) \ - do { \ - cfs_waitlink_t l; \ - cfs_waitq_timedwait(&l, s, t); \ - } while (0) - -#define CFS_TASK_INTERRUPTIBLE (0) -#define CFS_TASK_UNINT (0) - -/* 2.4 defines */ - -/* XXX - * for this moment, liblusre will not rely OST for non-page-aligned write - */ -#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE - -struct page { - void *addr; - unsigned long index; - struct list_head list; - unsigned long private; - - /* internally used by liblustre file i/o */ - int _offset; - int _count; -#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE - int _managed; -#endif -}; - -typedef struct page cfs_page_t; - -#define CFS_PAGE_SIZE PAGE_SIZE -#define CFS_PAGE_SHIFT PAGE_SHIFT -#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) - -cfs_page_t *cfs_alloc_page(unsigned int flags); -void cfs_free_page(cfs_page_t *pg); -void *cfs_page_address(cfs_page_t *pg); -void *cfs_kmap(cfs_page_t *pg); -void cfs_kunmap(cfs_page_t *pg); - -#define cfs_get_page(p) __I_should_not_be_called__(at_all) -#define cfs_page_count(p) __I_should_not_be_called__(at_all) -#define cfs_page_index(p) ((p)->index) - -/* - * Memory allocator - * Inline function, so utils can use them without linking of libcfs - */ -#define __ALLOC_ZERO (1 << 2) -static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *result; - - result = malloc(nr_bytes); - if (result != NULL && (flags & __ALLOC_ZERO)) - memset(result, 0, nr_bytes); - return result; -} - -#define cfs_free(addr) free(addr) -#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0) -#define cfs_free_large(addr) cfs_free(addr) - -#define CFS_ALLOC_ATOMIC_TRY (0) -/* - * SLAB allocator - */ -typedef struct { - int size; -} cfs_mem_cache_t; - -#define SLAB_HWCACHE_ALIGN 0 -#define SLAB_KERNEL 0 -#define SLAB_NOFS 0 - -cfs_mem_cache_t * -cfs_mem_cache_create(const char *, size_t, size_t, unsigned long); -int cfs_mem_cache_destroy(cfs_mem_cache_t *c); -void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp); -void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr); - -typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, - int count, int *eof, void *data); - -struct file; /* forward ref */ -typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* - * Signal - */ -typedef sigset_t cfs_sigset_t; - -/* - * Timer - */ -#include <sys/time.h> - -typedef struct { - struct list_head tl_list; - void (*function)(unsigned long unused); - unsigned long data; - long expires; -} cfs_timer_t; - -#define cfs_init_timer(t) do {} while(0) -#define cfs_jiffies \ -({ \ - unsigned long _ret = 0; \ - struct timeval tv; \ - if (gettimeofday(&tv, NULL) == 0) \ - _ret = tv.tv_sec; \ - _ret; \ -}) - -static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg) -{ - CFS_INIT_LIST_HEAD(&l->tl_list); - l->function = func; - l->data = (unsigned long)arg; - return 0; -} - -static inline int cfs_timer_is_armed(cfs_timer_t *l) -{ - if (cfs_time_before(cfs_jiffies, l->expires)) - return 1; - else - return 0; -} - -static inline void cfs_timer_arm(cfs_timer_t *l, int thetime) -{ - l->expires = thetime; -} - -static inline void cfs_timer_disarm(cfs_timer_t *l) -{ -} - -static inline long cfs_timer_deadline(cfs_timer_t *l) -{ - return l->expires; -} - -#if 0 -#define cfs_init_timer(t) do {} while(0) -void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); -void cfs_timer_done(struct cfs_timer *t); -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); -void cfs_timer_disarm(struct cfs_timer *t); -int cfs_timer_is_armed(struct cfs_timer *t); - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t); -#endif - -#define in_interrupt() (0) - -static inline void cfs_pause(cfs_duration_t d) -{ - struct timespec s; - - cfs_duration_nsec(d, &s); - nanosleep(&s, NULL); -} - -typedef void cfs_psdev_t; - -static inline int cfs_psdev_register(cfs_psdev_t *foo) -{ - return 0; -} - -static inline int cfs_psdev_deregister(cfs_psdev_t *foo) -{ - return 0; -} - -/* - * portable UNIX device file identification. - */ - -typedef unsigned int cfs_rdev_t; -// typedef unsigned long long kdev_t; -/* - */ -#define cfs_lock_kernel() do {} while (0) -#define cfs_sigfillset(l) do {} while (0) -#define cfs_recalc_sigpending(l) do {} while (0) -#define cfs_kernel_thread(l,m,n) LBUG() - -// static inline void local_irq_save(unsigned long flag) {return;} -// static inline void local_irq_restore(unsigned long flag) {return;} - -enum { - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -/* - * arithmetic - */ -#define do_div(a,b) \ - ({ \ - unsigned long remainder;\ - remainder = (a) % (b); \ - (a) = (a) / (b); \ - (remainder); \ - }) - - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_PRIM_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/user-time.h b/lnet/include/libcfs/user-time.h deleted file mode 100644 index 86cbc2ded2f4861939e899df409e6de7756a3c04..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/user-time.h +++ /dev/null @@ -1,201 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -#ifndef __LIBCFS_USER_TIME_H__ -#define __LIBCFS_USER_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#ifndef __KERNEL__ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION 1000000 - -/* - * Liblustre. time(2) based implementation. - */ - -#include <sys/types.h> -#include <sys/time.h> -#include <time.h> - -typedef time_t cfs_fs_time_t; -typedef time_t cfs_time_t; -typedef long cfs_duration_t; - -static inline cfs_time_t cfs_time_current(void) -{ - return time(NULL); -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return seconds; -} - -static inline time_t cfs_time_current_sec(void) -{ - return cfs_time_seconds(cfs_time_current()); -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return t1 < t2; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return t1 <= t2; -} - -static inline cfs_duration_t cfs_duration_build(int64_t nano) -{ - return (cfs_duration_t) (nano / ONE_BILLION); -} - -static inline time_t cfs_duration_sec(cfs_duration_t d) -{ - return d; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = d; - s->tv_usec = 0; -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = d; - s->tv_nsec = 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - time(t); -} - -static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return *t; -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - v->tv_sec = *t; - v->tv_usec = 0; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = *t; - s->tv_nsec = 0; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return *t1 < *t2; -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return *t1 <= *t2; -} - -#define CFS_TICK (1) - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return t + d; -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return t1 - t2; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before - -#define CFS_TIME_T "%lu" -#define CFS_DURATION_T "%ld" - -/* !__KERNEL__ */ -#endif - -/* __LIBCFS_USER_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/kp30.h b/lnet/include/libcfs/winnt/kp30.h deleted file mode 100644 index e494a9fde5621253e220e7b01bea6c11c7a50fc7..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/kp30.h +++ /dev/null @@ -1,156 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_KP30_H__ -#define __LIBCFS_WINNT_KP30_H__ - -#ifndef __LIBCFS_KP30_H__ -#error Do not #include this file directly. #include <libcfs/kp30.h> instead -#endif - -#include <libcfs/winnt/portals_compat25.h> -#include <lnet/types.h> - -#ifdef __KERNEL__ - -/* Module parameter support */ -#define CFS_MODULE_PARM(name, t, type, perm, desc) - -#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ - - -static inline void our_cond_resched() -{ - schedule_timeout(1i64); -} - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif - -#error Need a winnt version of panic() -#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL) -#error libcfs_register_panic_notifier() missing -#error libcfs_unregister_panic_notifier() missing - -#define cfs_work_struct_t WORK_QUEUE_ITEM -#define cfs_prepare_work(tq, routine, contex) -#define cfs_schedule_work(tq) - -/* ------------------------------------------------------------------- */ - -#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) (cfs_symbol_get(#x)) -#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) - -#define PORTAL_MODULE_USE do{}while(0) -#define PORTAL_MODULE_UNUSE do{}while(0) - -#define printk DbgPrint -#define ptintf DbgPrint - -#else /* !__KERNEL__ */ - -# include <stdio.h> -# include <stdlib.h> -#ifdef __CYGWIN__ -# include <cygwin-ioctl.h> -#endif -# include <time.h> - -#endif /* End of !__KERNEL__ */ - -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -/* kernel hasn't defined this? */ -typedef struct { - __s64 lwte_when; - char *lwte_where; - void *lwte_task; - long_ptr lwte_p1; - long_ptr lwte_p2; - long_ptr lwte_p3; - long_ptr lwte_p4; -# if BITS_PER_LONG > 32 - long_ptr lwte_pad; -# endif -} lwt_event_t; - - -# define LWT_EVENT(p1,p2,p3,p4) - - -/* ------------------------------------------------------------------ */ - -#define IOCTL_LIBCFS_TYPE long_ptr - -#ifdef __CYGWIN__ -# ifndef BITS_PER_LONG -# if (~0UL) == 0xffffffffUL -# define BITS_PER_LONG 32 -# else -# define BITS_PER_LONG 64 -# endif -# endif -#endif - -#if BITS_PER_LONG > 32 -# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a) -#else -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a) -#endif - -#if defined(__x86_64__) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - -#endif diff --git a/lnet/include/libcfs/winnt/libcfs.h b/lnet/include/libcfs/winnt/libcfs.h deleted file mode 100644 index 386eb5f9e0052e139475d3c54047b8ba0bbced53..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/libcfs.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_LIBCFS_H__ -#define __LIBCFS_WINNT_LIBCFS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* workgroud for VC compiler */ -#ifndef __FUNCTION__ -#define __FUNCTION__ "generic" -#endif - -#include <libcfs/winnt/winnt-types.h> -#include <libcfs/portals_utils.h> -#include <libcfs/winnt/winnt-time.h> -#include <libcfs/winnt/winnt-lock.h> -#include <libcfs/winnt/winnt-mem.h> -#include <libcfs/winnt/winnt-prim.h> -#include <libcfs/winnt/winnt-fs.h> -#include <libcfs/winnt/winnt-tcpip.h> - -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - -#ifdef __KERNEL__ - -enum { - /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ - CFS_STACK_TRACE_DEPTH = 16 -}; - -struct cfs_stack_trace { - void *frame[CFS_STACK_TRACE_DEPTH]; -}; - -static inline __u32 query_stack_size() -{ - ULONG LowLimit, HighLimit; - - IoGetStackLimits(&LowLimit, &HighLimit); - ASSERT(HighLimit > LowLimit); - - return (__u32) (HighLimit - LowLimit); -} -#else -static inline __u32 query_stack_size() -{ - return 4096; -} -#endif - - -#ifndef THREAD_SIZE -# define THREAD_SIZE query_stack_size() -#endif - -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#ifdef __KERNEL__ -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((ulong_ptr)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK (IoGetRemainingStackSize()) -# error "This doesn't seem right; CDEBUG_STACK should grow with the stack" -# endif /* __ia64__ */ - -#define CHECK_STACK() \ -do { \ - unsigned long _stack = CDEBUG_STACK(); \ - \ - if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ - libcfs_stack = _stack; \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ - __FILE__, NULL, __LINE__, \ - "maximum lustre stack %lu\n", _stack); \ - } \ -} while (0) -#else /* !__KERNEL__ */ -#define CHECK_STACK() do { } while(0) -#define CDEBUG_STACK() (0L) -#endif /* __KERNEL__ */ - -/* initial pid */ -#define LUSTRE_LNET_PID 12345 - -#define ENTRY_NESTING_SUPPORT (0) -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) -#define __current_nesting_level() (0) - -#endif /* _WINNT_LIBCFS_H */ diff --git a/lnet/include/libcfs/winnt/lltrace.h b/lnet/include/libcfs/winnt/lltrace.h deleted file mode 100644 index 9615e94e7c750d26233923a9d2b68c0ce81ee195..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/lltrace.h +++ /dev/null @@ -1,33 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_LLTRACE_H__ -#define __LIBCFS_WINNT_LLTRACE_H__ - -#ifndef __LIBCFS_LLTRACE_H__ -#error Do not #include this file directly. #include <libcfs/lltrace.h> instead -#endif - - -#endif diff --git a/lnet/include/libcfs/winnt/portals_compat25.h b/lnet/include/libcfs/winnt/portals_compat25.h deleted file mode 100644 index 579b795c6550c3fcb837392b343411a156c58459..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/portals_compat25.h +++ /dev/null @@ -1,28 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__ -#define __LIBCFS_WINNT_PORTALS_COMPAT_H__ - - - -#endif /* _PORTALS_COMPAT_H */ diff --git a/lnet/include/libcfs/winnt/portals_utils.h b/lnet/include/libcfs/winnt/portals_utils.h deleted file mode 100644 index ec806925cdef007a29f40fa2b09b86d99fe417c2..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/portals_utils.h +++ /dev/null @@ -1,168 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__ -#define __LIBCFS_WINNT_PORTALS_UTILS_H__ - -#ifndef __LIBCFS_PORTALS_UTILS_H__ -#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead -#endif - -#ifndef cfs_is_flag_set -#define cfs_is_flag_set(x,f) (((x)&(f))==(f)) -#endif - -#ifndef cfs_set_flag -#define cfs_set_flag(x,f) ((x) |= (f)) -#endif - -#ifndef cfs_clear_flag -#define cfs_clear_flag(x,f) ((x) &= ~(f)) -#endif - - -static inline __u32 __do_div(__u32 * n, __u32 b) -{ - __u32 mod; - - mod = *n % b; - *n = *n / b; - return mod; -} - -#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base)) - -#ifdef __KERNEL__ - -#include <stdlib.h> -#include <libcfs/winnt/winnt-types.h> - -char * strsep(char **s, const char *ct); -static inline size_t strnlen(const char * s, size_t count) { - size_t len = 0; - while(len < count && s[len++]); - return len; -} -char * ul2dstr(ulong_ptr address, char *buf, int len); - -#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) -#define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3) -#define simple_strtoull(a1, a2, a3) strtoull(a1, a2, a3) - -unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base); - -static inline int test_bit(int nr, void * addr) -{ - return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0; -} - -static inline void clear_bit(int nr, void * addr) -{ - (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31))); -} - - -static inline void set_bit(int nr, void * addr) -{ - (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31)); -} - -static inline void read_random(char *buf, int len) -{ - ULONG Seed = (ULONG) buf; - Seed = RtlRandom(&Seed); - while (len >0) { - if (len > sizeof(ULONG)) { - memcpy(buf, &Seed, sizeof(ULONG)); - len -= sizeof(ULONG); - buf += sizeof(ULONG); - } else { - memcpy(buf, &Seed, len); - len = 0; - break; - } - } -} -#define get_random_bytes(buf, len) read_random(buf, len) - -/* do NOT use function or expression as parameters ... */ - -#ifndef min_t -#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y) -#endif - -#ifndef max_t -#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x) -#endif - - -#define NIPQUAD(addr) \ - ((unsigned char *)&addr)[0], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[3] - -#define HIPQUAD(addr) \ - ((unsigned char *)&addr)[3], \ - ((unsigned char *)&addr)[2], \ - ((unsigned char *)&addr)[1], \ - ((unsigned char *)&addr)[0] - -static int copy_from_user(void *to, void *from, int c) -{ - memcpy(to, from, c); - return 0; -} - -static int copy_to_user(void *to, void *from, int c) -{ - memcpy(to, from, c); - return 0; -} - - -#define put_user(x, ptr) \ -( \ - *(ptr) = x, \ - 0 \ -) - - -#define get_user(x,ptr) \ -( \ - x = *(ptr), \ - 0 \ -) - -#define num_physpages (64 * 1024) - -#define snprintf _snprintf -#define vsnprintf _vsnprintf - - -#endif /* !__KERNEL__ */ - -int cfs_error_code(NTSTATUS); - -#endif diff --git a/lnet/include/libcfs/winnt/winnt-fs.h b/lnet/include/libcfs/winnt/winnt-fs.h deleted file mode 100644 index 6280b93e4b350332516e3384f5d0f95c6e90e20c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-fs.h +++ /dev/null @@ -1,280 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * File operations & routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_FS_H__ -#define __LIBCFS_WINNT_CFS_FS_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -/* - * Platform defines - * - * cfs_rdev_t - */ - -typedef unsigned short cfs_rdev_t; - -typedef unsigned int cfs_major_nr_t; -typedef unsigned int cfs_minor_nr_t; - - -#define MINORBITS 8 -#define MINORMASK ((1U << MINORBITS) - 1) - -#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) -#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) -#define NODEV 0 -#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) - - -static inline cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor) -{ - return MKDEV(major, minor); -} - -static inline cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev) -{ - return MAJOR(rdev); -} - -static inline cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev) -{ - return MINOR(rdev); -} - - -#ifdef __KERNEL__ - -struct file_operations -{ - loff_t (*lseek)(struct file * file, loff_t offset, int origin); - ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos); - ssize_t (*write)(struct file * file, const char * buffer, - size_t count, loff_t *ppos); - int (*ioctl) (struct file *, unsigned int, ulong_ptr); - int (*open) (struct file *); - int (*release) (struct file *); -}; - -struct file { - - cfs_handle_t f_handle; - unsigned int f_flags; - mode_t f_mode; - ulong_ptr f_count; - - //struct list_head f_list; - //struct dentry * f_dentry; - - cfs_proc_entry_t * proc_dentry; - cfs_file_operations_t * f_op; - - size_t f_size; - loff_t f_pos; - unsigned int f_uid, f_gid; - int f_error; - - ulong_ptr f_version; - - void * private_data; - - char f_name[1]; - -}; - -#define cfs_filp_size(f) ((f)->f_size) -#define cfs_filp_poff(f) (&(f)->f_pos) - -cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err); -int cfs_filp_close(cfs_file_t *fp); -int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); -int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); -int cfs_filp_fsync(cfs_file_t *fp); -int cfs_get_file(cfs_file_t *fp); -int cfs_put_file(cfs_file_t *fp); -int cfs_file_count(cfs_file_t *fp); - - - -/* - * CFS_FLOCK routines - */ - -typedef struct file_lock{ - int fl_type; - pid_t fl_pid; - size_t fl_len; - off_t fl_start; - off_t fl_end; -} cfs_flock_t; - -#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) -#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) - -#define cfs_flock_type(fl) ((fl)->fl_type) -#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) -#define cfs_flock_pid(fl) ((fl)->fl_pid) -#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) -#define cfs_flock_start(fl) ((fl)->fl_start) -#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) -#define cfs_flock_end(fl) ((fl)->fl_end) -#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) - -#define ATTR_MODE 0x0001 -#define ATTR_UID 0x0002 -#define ATTR_GID 0x0004 -#define ATTR_SIZE 0x0008 -#define ATTR_ATIME 0x0010 -#define ATTR_MTIME 0x0020 -#define ATTR_CTIME 0x0040 -#define ATTR_ATIME_SET 0x0080 -#define ATTR_MTIME_SET 0x0100 -#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 0x0400 -#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ -//#define ATTR_CTIME_SET 0x2000 -#define ATTR_BLOCKS 0x4000 - -#define in_group_p(x) (0) - -/* - * proc fs routines - */ - -int proc_init_fs(); -void proc_destroy_fs(); - - -/* - * misc - */ - -static inline void *ERR_PTR(long_ptr error) -{ - return (void *) error; -} - -static inline long_ptr PTR_ERR(const void *ptr) -{ - return (long_ptr) ptr; -} - -static inline long_ptr IS_ERR(const void *ptr) -{ - return (ulong_ptr)ptr > (ulong_ptr)-1000L; -} - -#else /* !__KERNEL__ */ - -#define CREATE_NEW 1 -#define CREATE_ALWAYS 2 -#define OPEN_EXISTING 3 -#define OPEN_ALWAYS 4 -#define TRUNCATE_EXISTING 5 - -#define SECTION_QUERY 0x0001 -#define SECTION_MAP_WRITE 0x0002 -#define SECTION_MAP_READ 0x0004 -#define SECTION_MAP_EXECUTE 0x0008 -#define SECTION_EXTEND_SIZE 0x0010 - -#define FILE_MAP_COPY SECTION_QUERY -#define FILE_MAP_WRITE SECTION_MAP_WRITE -#define FILE_MAP_READ SECTION_MAP_READ -#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS - - -NTSYSAPI -HANDLE -NTAPI -CreateFileA( - IN LPCSTR lpFileName, - IN DWORD dwDesiredAccess, - IN DWORD dwShareMode, - IN PVOID lpSecurityAttributes, - IN DWORD dwCreationDisposition, - IN DWORD dwFlagsAndAttributes, - IN HANDLE hTemplateFile - ); - -#define CreateFile CreateFileA - -NTSYSAPI -BOOL -NTAPI -CloseHandle( - IN OUT HANDLE hObject - ); - -NTSYSAPI -HANDLE -NTAPI -CreateFileMappingA( - IN HANDLE hFile, - IN PVOID lpFileMappingAttributes, - IN DWORD flProtect, - IN DWORD dwMaximumSizeHigh, - IN DWORD dwMaximumSizeLow, - IN LPCSTR lpName - ); -#define CreateFileMapping CreateFileMappingA - -NTSYSAPI -DWORD -NTAPI -GetFileSize( - IN HANDLE hFile, - OUT DWORD * lpFileSizeHigh - ); - -NTSYSAPI -PVOID -NTAPI -MapViewOfFile( - IN HANDLE hFileMappingObject, - IN DWORD dwDesiredAccess, - IN DWORD dwFileOffsetHigh, - IN DWORD dwFileOffsetLow, - IN SIZE_T dwNumberOfBytesToMap - ); - -NTSYSAPI -BOOL -NTAPI -UnmapViewOfFile( - IN PVOID lpBaseAddress - ); - -#endif /* __KERNEL__ */ - -typedef struct { - void *d; -} cfs_dentry_t; - - -#endif /* __LIBCFS_WINNT_CFS_FS_H__*/ diff --git a/lnet/include/libcfs/winnt/winnt-lock.h b/lnet/include/libcfs/winnt/winnt-lock.h deleted file mode 100644 index e0b9393eaa40c1ad7ef7ba6c39ec1d459349fb90..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-lock.h +++ /dev/null @@ -1,686 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_LOCK_H__ -#define __LIBCFS_WINNT_CFS_LOCK_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - - -/* - * nt specific part ... - */ - - -/* atomic */ - -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { i } - -#define atomic_read(v) ((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) - -void FASTCALL atomic_add(int i, atomic_t *v); -void FASTCALL atomic_sub(int i, atomic_t *v); - -int FASTCALL atomic_sub_and_test(int i, atomic_t *v); - -void FASTCALL atomic_inc(atomic_t *v); -void FASTCALL atomic_dec(atomic_t *v); - -int FASTCALL atomic_dec_and_test(atomic_t *v); -int FASTCALL atomic_inc_and_test(atomic_t *v); - - -/* event */ - -typedef KEVENT event_t; - -/* - * cfs_init_event - * To initialize the event object - * - * Arguments: - * event: pointer to the event object - * type: Non Zero: SynchronizationEvent - * Zero: NotificationEvent - * status: the initial stats of the event - * Non Zero: signaled - * Zero: un-signaled - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ -static inline void - cfs_init_event(event_t *event, int type, int status) -{ - KeInitializeEvent( - event, - (type) ? SynchronizationEvent: NotificationEvent, - (status) ? TRUE : FALSE - ); -} - -/* - * cfs_wait_event - * To wait on an event to syncrhonize the process - * - * Arguments: - * event: pointer to the event object - * timeout: the timeout for waitting or 0 means infinite time. - * - * Return Value: - * Zero: waiting timeouts - * Non Zero: event signaled ... - * - * Notes: - * N/A - */ - -static inline int64_t -cfs_wait_event(event_t * event, int64_t timeout) -{ - NTSTATUS Status; - LARGE_INTEGER TimeOut; - - TimeOut.QuadPart = -1 * (10000000/HZ) * timeout; - - Status = KeWaitForSingleObject( - event, - Executive, - KernelMode, - FALSE, - (timeout != 0) ? (&TimeOut) : (NULL) - ); - - if (Status == STATUS_TIMEOUT) { - return 0; - } - - return TRUE; // signaled case -} - -/* - * cfs_wake_event - * To signal the event object - * - * Arguments: - * event: pointer to the event object - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline int -cfs_wake_event(event_t * event) -{ - return (KeSetEvent(event, 0, FALSE) != 0); -} - -/* - * cfs_clear_event - * To clear/reset the status of the event object - * - * Arguments: - * event: pointer to the event object - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void -cfs_clear_event(event_t * event) -{ - KeResetEvent(event); -} - - -/* - * IMPORTANT !!!!!!!! - * - * All locks' declaration are not guaranteed to be initialized, - * Althought some of they are initialized in Linux. All locks - * declared by CFS_DECL_* should be initialized explicitly. - */ - - -/* - * spin lock defintions / routines - */ - -/* - * Warning: - * - * for spinlock operations, try to grab nesting acquisition of - * spinlock will cause dead-lock in MP system and current irql - * overwritten for UP system. (UP system could allow nesting spin - * acqisition, because it's not spin at all just raising the irql.) - * - */ - -typedef struct spin_lock { - - KSPIN_LOCK lock; - KIRQL irql; - -} spinlock_t; - - -#define CFS_DECL_SPIN(name) spinlock_t name; -#define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name; - - -static inline void spin_lock_init(spinlock_t *lock) -{ - KeInitializeSpinLock(&(lock->lock)); -} - - -static inline void spin_lock(spinlock_t *lock) -{ - KeAcquireSpinLock(&(lock->lock), &(lock->irql)); -} - -static inline void spin_unlock(spinlock_t *lock) -{ - KIRQL irql = lock->irql; - KeReleaseSpinLock(&(lock->lock), irql); -} - - -#define spin_lock_irqsave(lock, flags) do {(flags) = 0; spin_lock(lock);} while(0) -#define spin_unlock_irqrestore(lock, flags) do {spin_unlock(lock);} while(0) - - -/* There's no corresponding routine in windows kernel. - We must realize a light one of our own. But there's - no way to identify the system is MP build or UP build - on the runtime. We just uses a workaround for it. */ - -extern int MPSystem; - -static int spin_trylock(spinlock_t *lock) -{ - KIRQL Irql; - int rc = 0; - - ASSERT(lock != NULL); - - KeRaiseIrql(DISPATCH_LEVEL, &Irql); - - if (MPSystem) { - if (0 == (ulong_ptr)lock->lock) { -#if _X86_ - __asm { - mov edx, dword ptr [ebp + 8] - lock bts dword ptr[edx], 0 - jb lock_failed - mov rc, TRUE - lock_failed: - } -#else - KdBreakPoint(); -#endif - - } - } else { - rc = TRUE; - } - - if (rc) { - lock->irql = Irql; - } else { - KeLowerIrql(Irql); - } - - return rc; -} - -/* synchronization between cpus: it will disable all DPCs - kernel task scheduler on the CPU */ -#define spin_lock_bh(x) spin_lock(x) -#define spin_unlock_bh(x) spin_unlock(x) -#define spin_lock_bh_init(x) spin_lock_init(x) - -/* - * rw_semaphore (using ERESOURCE) - */ - - -typedef struct rw_semaphore { - ERESOURCE rwsem; -} rw_semaphore_t; - - -#define CFS_DECL_RWSEM(name) rw_semaphore_t name -#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name - - -/* - * init_rwsem - * To initialize the the rw_semaphore_t structure - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_rwsem(rw_semaphore_t *s) -{ - ExInitializeResourceLite(&s->rwsem); -} - - -/* - * fini_rwsem - * To finilize/destroy the the rw_semaphore_t structure - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * For winnt system, we need this routine to delete the ERESOURCE. - * Just define it NULL for other systems. - */ - -static inline void fini_rwsem(rw_semaphore_t *s) -{ - ExDeleteResourceLite(&s->rwsem); -} - -/* - * down_read - * To acquire read-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void down_read(struct rw_semaphore *s) -{ - ExAcquireResourceSharedLite(&s->rwsem, TRUE); -} - - -/* - * down_read_trylock - * To acquire read-lock of the rw_semahore without blocking - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * Zero: failed to acquire the read lock - * Non-Zero: succeeded to acquire the read lock - * - * Notes: - * This routine will return immediately without waiting. - */ - -static inline int down_read_trylock(struct rw_semaphore *s) -{ - return ExAcquireResourceSharedLite(&s->rwsem, FALSE); -} - - -/* - * down_write - * To acquire write-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void down_write(struct rw_semaphore *s) -{ - ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE); -} - - -/* - * down_write_trylock - * To acquire write-lock of the rw_semahore without blocking - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * Zero: failed to acquire the write lock - * Non-Zero: succeeded to acquire the read lock - * - * Notes: - * This routine will return immediately without waiting. - */ - -static inline int down_write_trylock(struct rw_semaphore *s) -{ - return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE); -} - - -/* - * up_read - * To release read-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void up_read(struct rw_semaphore *s) -{ - ExReleaseResourceForThreadLite( - &(s->rwsem), - ExGetCurrentResourceThread()); -} - - -/* - * up_write - * To release write-lock of the rw_semahore - * - * Arguments: - * rwsem: pointer to the rw_semaphore_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void up_write(struct rw_semaphore *s) -{ - ExReleaseResourceForThreadLite( - &(s->rwsem), - ExGetCurrentResourceThread()); -} - -/* - * rwlock_t (using sempahore) - * - * - rwlock_init(x) - * - read_lock(x) - * - read_unlock(x) - * - write_lock(x) - * - write_unlock(x) - */ - -typedef struct { - spinlock_t guard; - int count; -} rwlock_t; - -void rwlock_init(rwlock_t * rwlock); -void rwlock_fini(rwlock_t * rwlock); - -void read_lock(rwlock_t * rwlock); -void read_unlock(rwlock_t * rwlock); -void write_lock(rwlock_t * rwlock); -void write_unlock(rwlock_t * rwlock); - -#define write_lock_irqsave(l, f) do {f = 0; write_lock(l);} while(0) -#define write_unlock_irqrestore(l, f) do {write_unlock(l);} while(0) -#define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0) -#define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0) - - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ - -typedef struct semaphore { - KSEMAPHORE sem; -} mutex_t; - -static inline void sema_init(struct semaphore *s, int val) -{ - KeInitializeSemaphore(&s->sem, val, val); -} - -static inline void __down(struct semaphore *s) -{ - KeWaitForSingleObject( &(s->sem), Executive, - KernelMode, FALSE, NULL ); - -} - -static inline void __up(struct semaphore *s) -{ - KeReleaseSemaphore(&s->sem, 0, 1, FALSE); -} - -/* - * mutex_t: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - - -/* - * init_mutex - * To initialize a mutex_t structure - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_mutex(mutex_t *mutex) -{ - sema_init(mutex, 1); -} - - -/* - * mutex_down - * To acquire the mutex lock - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void mutex_down(mutex_t *mutex) -{ - __down(mutex); -} - - -/* - * mutex_up - * To release the mutex lock (acquired already) - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void mutex_up(mutex_t *mutex) -{ - __up(mutex); -} - - -/* - * init_mutex_locked - * To initialize the mutex as acquired state - * - * Arguments: - * mutex: pointer to the mutex_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline init_mutex_locked(mutex_t *mutex) -{ - init_mutex(mutex); - mutex_down(mutex); -} - -/* - * completion - * - * - init_complition(c) - * - complete(c) - * - wait_for_completion(c) - */ - -struct completion { - event_t event; -}; - - -/* - * init_completion - * To initialize the completion object - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void init_completion(struct completion *c) -{ - cfs_init_event(&(c->event), 1, FALSE); -} - - -/* - * complete - * To complete/signal the completion object - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void complete(struct completion *c) -{ - cfs_wake_event(&(c->event)); -} - -/* - * wait_for_completion - * To wait on the completion object. If the event is signaled, - * this function will return to the call with the event un-singled. - * - * Arguments: - * c: pointer to the completion structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -static inline void wait_for_completion(struct completion *c) -{ - cfs_wait_event(&(c->event), 0); -} - -/* __KERNEL__ */ -#else - -#include "../user-lock.h" - -/* __KERNEL__ */ -#endif -#endif diff --git a/lnet/include/libcfs/winnt/winnt-mem.h b/lnet/include/libcfs/winnt/winnt-mem.h deleted file mode 100644 index b7f00a4165a1fa5051c21c31836726a162e69919..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-mem.h +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines of memory manipulation routines . - * - */ - -#ifndef __LIBCFS_WINNT_CFS_MEM_H__ -#define __LIBCFS_WINNT_CFS_MEM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -#ifdef __KERNEL__ - -#define CFS_PAGE_SIZE PAGE_SIZE -#define CFS_PAGE_SHIFT PAGE_SHIFT -#define CFS_PAGE_MASK (~(PAGE_SIZE - 1)) - -typedef struct cfs_page { - void * addr; - atomic_t count; -} cfs_page_t; - - -cfs_page_t *cfs_alloc_page(int flags); -void cfs_free_page(cfs_page_t *pg); - -static inline void *cfs_page_address(cfs_page_t *page) -{ - return page->addr; -} - -static inline void *cfs_kmap(cfs_page_t *page) -{ - return page->addr; -} - -static inline void cfs_kunmap(cfs_page_t *page) -{ - return; -} - -static inline void cfs_get_page(cfs_page_t *page) -{ - atomic_inc(&page->count); -} - -static inline void cfs_put_page(cfs_page_t *page) -{ - atomic_dec(&page->count); -} - -static inline int cfs_page_count(cfs_page_t *page) -{ - return atomic_read(&page->count); -} - -/* - * Memory allocator - */ - -#define CFS_ALLOC_ATOMIC_TRY (0) - -extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); -extern void cfs_free(void *addr); - -extern void *cfs_alloc_large(size_t nr_bytes); -extern void cfs_free_large(void *addr); - -/* - * SLAB allocator - */ - -#define SLAB_HWCACHE_ALIGN 0 - -/* The cache name is limited to 20 chars */ - -typedef struct cfs_mem_cache { - - char name[20]; - ulong_ptr flags; - NPAGED_LOOKASIDE_LIST npll; - -} cfs_mem_cache_t; - - -extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr); -extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); -extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); -extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); - - -/* - * Page allocator slabs - */ - -extern cfs_mem_cache_t *cfs_page_t_slab; -extern cfs_mem_cache_t *cfs_page_p_slab; - - -#define CFS_DECL_MMSPACE -#define CFS_MMSPACE_OPEN do {} while(0) -#define CFS_MMSPACE_CLOSE do {} while(0) - - -#define mb() do {} while(0) -#define rmb() mb() -#define wmb() mb() - - -/* __KERNEL__ */ -#endif - -#endif /* __WINNT_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/winnt/winnt-prim.h b/lnet/include/libcfs/winnt/winnt-prim.h deleted file mode 100644 index 3c8560b71c952f95e5aacf4404100ce68136840c..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-prim.h +++ /dev/null @@ -1,1082 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef __LIBCFS_WINNT_CFS_PRIM_H__ -#define __LIBCFS_WINNT_CFS_PRIM_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -/* - * libcfs proc device object - */ - - -#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */ -#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */ - - -/* - * Device IO Control Code Definitions - */ - -#define FILE_DEVICE_LIBCFS ('LC') - -#define FILE_DEVICE_LIBCFS ('LC') - -#define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs -#define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs - - -#define IOCTL_LIBCFS_VERSION \ - CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS) -#define IOCTL_LIBCFS_ENTRY \ - CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS) - -#pragma pack(4) - -typedef struct _CFS_PROC_IOCTL { - - ULONG cmd; // ioctl command identifier - ULONG len; // length of data - - // UCHAR data[]; // content of the real ioctl - -} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL; - -#pragma pack() - -#ifdef __KERNEL__ - -#include <libcfs/list.h> - -/* - * Symbol functions for libcfs - * - * OSX has no facility for use to register symbol. - * So we have to implement it. - */ -#define CFS_SYMBOL_LEN 64 - -struct cfs_symbol { - char name[CFS_SYMBOL_LEN]; - void *value; - int ref; - struct list_head sym_list; -}; - -extern int cfs_symbol_register(const char *, const void *); -extern void cfs_symbol_unregister(const char *); -extern void * cfs_symbol_get(const char *); -extern void cfs_symbol_put(const char *); -extern void cfs_symbol_clean(); - - - -typedef struct file_operations cfs_file_operations_t; -typedef struct file cfs_file_t; - -/* - * Pseudo device register - */ - -typedef struct -{ - int minor; - const char * name; - cfs_file_operations_t * fops; -} cfs_psdev_t; - -int cfs_psdev_register(cfs_psdev_t * psdev); -int cfs_psdev_deregister(cfs_psdev_t * psdev); - - -/* - * Proc emulator file system APIs - */ - -typedef int cfs_read_proc_t(char *page, char **start, off_t off, - int count, int *eof, void *data); -typedef int cfs_write_proc_t(struct file *file, const char *buffer, - ulong_ptr count, void *data); - -#define CFS_PROC_ENTRY_MAGIC 'CPEM' - -#define CFS_PROC_FLAG_DIRECTORY 0x00000001 // directory node -#define CFS_PROC_FLAG_ATTACHED 0x00000002 // node is attached to proc -#define CFS_PROC_FLAG_MISCDEV 0x00000004 // miscellaneous device - -typedef struct cfs_proc_entry -{ - ULONG magic; // Magic - ULONG flags; // Flags - - struct _dir_entry { // proc directory entry - PRTL_SPLAY_LINKS root; - }; - - struct _file_entry { // proc file / leaf entry - cfs_read_proc_t * read_proc; - cfs_write_proc_t * write_proc; - }; - - mode_t mode; - unsigned short nlink; - - - struct file_operations * proc_fops; - void * data; - - // proc_dir_entry ended. - - RTL_SPLAY_LINKS s_link; // splay link - - // - // Maximum length of proc entry name is 0x20 - // - - char name[0x20]; - -} cfs_proc_entry_t, cfs_proc_dir_entry_t; - -typedef cfs_proc_entry_t cfs_proc_dir_entry_t; - -#define PROC_BLOCK_SIZE PAGE_SIZE - -/* - * Sysctl register - */ - -typedef struct ctl_table cfs_sysctl_table_t; -typedef struct ctl_table_header cfs_sysctl_table_header_t; - - -typedef int ctl_handler ( - cfs_sysctl_table_t *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, - void **context ); - -typedef int proc_handler ( - cfs_sysctl_table_t *ctl, - int write, struct file * filp, - void *buffer, size_t *lenp ); - - -int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp); - -int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp); - -int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context); - - -/* - * System io control definitions - */ - -#define CTL_MAXNAME 10 - -#define CTL_ANY -1 /* Matches any name */ -#define CTL_NONE 0 - -enum -{ - CTL_KERN=1, /* General kernel info and control */ - CTL_VM=2, /* VM management */ - CTL_NET=3, /* Networking */ - CTL_PROC=4, /* Process info */ - CTL_FS=5, /* Filesystems */ - CTL_DEBUG=6, /* Debugging */ - CTL_DEV=7, /* Devices */ - CTL_BUS=8, /* Busses */ - CTL_ABI=9, /* Binary emulation */ - CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ -}; - -/* sysctl table definitons */ -struct ctl_table -{ - int ctl_name; - char *procname; - void *data; - int maxlen; - mode_t mode; - cfs_sysctl_table_t *child; - proc_handler *proc_handler; /* text formatting callback */ - ctl_handler *strategy; /* read / write callback functions */ - cfs_proc_entry_t *de; /* proc entry block */ - void *extra1; - void *extra2; -}; - - -/* the mantaner of the cfs_sysctl_table trees */ -struct ctl_table_header -{ - cfs_sysctl_table_t * ctl_table; - struct list_head ctl_entry; -}; - - -cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod, - cfs_proc_entry_t *parent); -void proc_free_entry(cfs_proc_entry_t *de); -void remove_proc_entry(char *name, cfs_proc_entry_t *entry); -cfs_proc_entry_t * search_proc_entry(char * name, - cfs_proc_entry_t * root ); - -#define cfs_create_proc_entry create_proc_entry -#define cfs_free_proc_entry proc_free_entry -#define cfs_remove_proc_entry remove_proc_entry - -#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a) -#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a) - - -/* - * declaration of proc kernel process routines - */ - -cfs_file_t * -lustre_open_file(char * filename); - -int -lustre_close_file(cfs_file_t * fh); - -int -lustre_do_ioctl( cfs_file_t * fh, - unsigned long cmd, - ulong_ptr arg ); - -int -lustre_ioctl_file( cfs_file_t * fh, - PCFS_PROC_IOCTL devctl); - -size_t -lustre_read_file( cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ); - -size_t -lustre_write_file( cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ); - -/* - * Wait Queue - */ - - -typedef int cfs_task_state_t; - -#define CFS_TASK_INTERRUPTIBLE 0x00000001 -#define CFS_TASK_UNINT 0x00000002 - - - -#define CFS_WAITQ_MAGIC 'CWQM' -#define CFS_WAITLINK_MAGIC 'CWLM' - -typedef struct cfs_waitq { - - unsigned int magic; - unsigned int flags; - - spinlock_t guard; - struct list_head waiters; - -} cfs_waitq_t; - - -typedef struct cfs_waitlink cfs_waitlink_t; - -#define CFS_WAITQ_CHANNELS (2) - -#define CFS_WAITQ_CHAN_NORMAL (0) -#define CFS_WAITQ_CHAN_FORWARD (1) - - - -typedef struct cfs_waitlink_channel { - struct list_head link; - cfs_waitq_t * waitq; - cfs_waitlink_t * waitl; -} cfs_waitlink_channel_t; - -struct cfs_waitlink { - - unsigned int magic; - int flags; - event_t * event; - atomic_t * hits; - - cfs_waitlink_channel_t waitq[CFS_WAITQ_CHANNELS]; -}; - -enum { - CFS_WAITQ_EXCLUSIVE = 1 -}; - -#define CFS_DECL_WAITQ(name) cfs_waitq_t name - - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); - -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, cfs_duration_t timeout); - - - -/* Kernel thread */ - -typedef int (*cfs_thread_t) (void *arg); - -typedef struct _cfs_thread_context { - cfs_thread_t func; - void * arg; -} cfs_thread_context_t; - -int cfs_kernel_thread(int (*func)(void *), void *arg, int flag); - -/* - * thread creation flags from Linux, not used in winnt - */ -#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ -#define CLONE_VM 0x00000100 /* set if VM shared between processes */ -#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ -#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ -#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ -#define CLONE_PID 0x00001000 /* set if pid shared */ -#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ -#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ -#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ -#define CLONE_THREAD 0x00010000 /* Same thread group? */ -#define CLONE_NEWNS 0x00020000 /* New namespace group? */ - -#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) - - -/* - * sigset ... - */ - -typedef sigset_t cfs_sigset_t; - -/* - * Task struct - */ - -#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12)) - - -#define NGROUPS 1 -#define CFS_CURPROC_COMM_MAX (16) -typedef struct task_sruct{ - mode_t umask; - - pid_t pid; - pid_t pgrp; - - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - - int ngroups; - gid_t groups[NGROUPS]; - cfs_kernel_cap_t cap_effective, - cap_inheritable, - cap_permitted; - - char comm[CFS_CURPROC_COMM_MAX]; - void * journal_info; -} cfs_task_t; - - -/* - * linux task struct emulator ... - */ - -#define TASKMAN_MAGIC 'TMAN' /* Task Manager */ -#define TASKSLT_MAGIC 'TSLT' /* Task Slot */ - -typedef struct _TASK_MAN { - - ULONG Magic; /* Magic and Flags */ - ULONG Flags; - - spinlock_t Lock; /* Protection lock */ - - cfs_mem_cache_t * slab; /* Memory slab for task slot */ - - ULONG NumOfTasks; /* Total tasks (threads) */ - LIST_ENTRY TaskList; /* List of task slots */ - -} TASK_MAN, *PTASK_MAN; - -typedef struct _TASK_SLOT { - - ULONG Magic; /* Magic and Flags */ - ULONG Flags; - - LIST_ENTRY Link; /* To be linked to TaskMan */ - - event_t Event; /* Schedule event */ - - HANDLE Pid; /* Process id */ - HANDLE Tid; /* Thread id */ - PETHREAD Tet; /* Pointer to ethread */ - - atomic_t count; /* refer count */ - atomic_t hits; /* times of waken event singaled */ - - KIRQL irql; /* irql for rwlock ... */ - - cfs_task_t task; /* linux task part */ - -} TASK_SLOT, *PTASK_SLOT; - - -#define current cfs_current() -#define set_current_state(s) do {;} while (0) - -#define wait_event(wq, condition) \ -do { \ - cfs_waitlink_t __wait; \ - \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while(0) - -#define wait_event_interruptible(wq, condition, __ret) \ -do { \ - cfs_waitlink_t __wait; \ - \ - __ret = 0; \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ -} while(0) - - -int init_task_manager(); -void cleanup_task_manager(); -cfs_task_t * cfs_current(); -int schedule_timeout(int64_t time); -int schedule(); -int wake_up_process(cfs_task_t * task); -#define cfs_schedule_timeout(state, time) schedule_timeout(time) -void sleep_on(cfs_waitq_t *waitq); - -#define CFS_DECL_JOURNAL_DATA -#define CFS_PUSH_JOURNAL do {;} while(0) -#define CFS_POP_JOURNAL do {;} while(0) - - -/* module related definitions */ - -#ifndef __exit -#define __exit -#endif -#ifndef __init -#define __init -#endif - -#define request_module(x) (0) - -#define EXPORT_SYMBOL(s) -#define MODULE_AUTHOR(s) -#define MODULE_DESCRIPTION(s) -#define MODULE_LICENSE(s) -#define MODULE_PARM(a, b) -#define MODULE_PARM_DESC(a, b) - -#define module_init(X) int __init module_##X() {return X();} -#define module_exit(X) void __exit module_##X() {X();} - -#define DECLARE_INIT(X) extern int __init module_##X(void) -#define DECLARE_EXIT(X) extern void __exit module_##X(void) - -#define MODULE_INIT(X) do { int rc = module_##X(); \ - if (rc) goto errorout; \ - } while(0) - -#define MODULE_EXIT(X) do { module_##X(); } while(0) - - -/* Module interfaces */ -#define cfs_module(name, version, init, fini) \ -module_init(init); \ -module_exit(fini) - - -/* - * Linux kernel version definition - */ - -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE (2*100+6*10+7) - - -/* - * Signal - */ -#define SIGNAL_MASK_ASSERT() - -/* - * Timer - */ - -#define CFS_TIMER_FLAG_INITED 0x00000001 // Initialized already -#define CFS_TIMER_FLAG_TIMERED 0x00000002 // KeSetTimer is called - -typedef struct cfs_timer { - - KSPIN_LOCK Lock; - - ULONG Flags; - - KDPC Dpc; - KTIMER Timer; - - cfs_time_t deadline; - - void (*proc)(ulong_ptr); - void * arg; - -} cfs_timer_t; - - -typedef void (*timer_func_t)(ulong_ptr); - -#define cfs_init_timer(t) - -void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg); -void cfs_timer_done(cfs_timer_t *t); -void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline); -void cfs_timer_disarm(cfs_timer_t *t); -int cfs_timer_is_armed(cfs_timer_t *t); -cfs_time_t cfs_timer_deadline(cfs_timer_t *t); - - -/* deschedule for a bit... */ -static inline void cfs_pause(cfs_duration_t ticks) -{ - cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks); -} - - -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; -#else - KdBreakPoint(); -#endif -} - -/* - * libcfs globals initialization/cleanup - */ - -int -libcfs_arch_init(void); - -void -libcfs_arch_cleanup(void); - -/* - * SMP ... - */ - -#define SMP_CACHE_BYTES 128 -#define __cacheline_aligned -#define NR_CPUS (2) -#define smp_processor_id() KeGetCurrentProcessorNumber() -#define smp_num_cpus NR_CPUS -#define num_online_cpus() smp_num_cpus -#define smp_call_function(f, a, n, w) do {} while(0) - -/* - * Irp related - */ - -#define NR_IRQS 512 -#define in_interrupt() (0) - -/* - * printk flags - */ - -#define KERN_EMERG "<0>" /* system is unusable */ -#define KERN_ALERT "<1>" /* action must be taken immediately */ -#define KERN_CRIT "<2>" /* critical conditions */ -#define KERN_ERR "<3>" /* error conditions */ -#define KERN_WARNING "<4>" /* warning conditions */ -#define KERN_NOTICE "<5>" /* normal but significant condition */ -#define KERN_INFO "<6>" /* informational */ -#define KERN_DEBUG "<7>" /* debug-level messages */ - -/* - * Misc - */ - - -#define inter_module_get(n) cfs_symbol_get(n) -#define inter_module_put(n) cfs_symbol_put(n) - -#ifndef likely -#define likely(exp) (exp) -#endif -#ifndef unlikely -#define unlikely(exp) (exp) -#endif - -#define lock_kernel() do {} while(0) -#define unlock_kernel() do {} while(0) - -#define CAP_SYS_ADMIN 0 -#define CAP_SYS_ROOT 1 - -#define capable(a) (TRUE) - -#define USERMODEHELPER(path, argv, envp) (0) - - -#define local_irq_save(x) -#define local_irq_restore(x) - -#define cfs_assert ASSERT - -#define THREAD_NAME - -#else /* !__KERNEL__ */ - -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_MASK PAGE_MASK - -#define getpagesize() (PAGE_SIZE) - - -typedef struct { - int foo; -} pthread_mutex_t; - -typedef struct { - int foo; -} pthread_cond_t; - -#define pthread_mutex_init(x, y) do {} while(0) -#define pthread_cond_init(x, y) do {} while(0) - -#define pthread_mutex_lock(x) do {} while(0) -#define pthread_mutex_unlock(x) do {} while(0) - -#define pthread_cond_wait(x,y) do {} while(0) -#define pthread_cond_broadcast(x) do {} while(0) - -typedef struct file { - int foo; -} cfs_file_t; - -typedef struct cfs_proc_dir_entry{ - void *data; -}cfs_proc_dir_entry_t; - - - -#include "../user-prim.h" - -#include <sys/stat.h> -#include <sys/types.h> - -#define strcasecmp strcmp -#define strncasecmp strncmp -#define snprintf _snprintf -#define getpid() (0) - - -#define getpwuid(x) (NULL) -#define getgrgid(x) (NULL) - -int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev); - -int gethostname(char * name, int namelen); - -#define setlinebuf(x) do {} while(0) - - -NTSYSAPI VOID NTAPI DebugBreak(); - - -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; -#else - DebugBreak(); -#endif -} - -/* Maximum EA Information Length */ -#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15) - - -/* - * proc user mode routines - */ - -HANDLE cfs_proc_open (char * filename, int oflag); -int cfs_proc_close(HANDLE handle); -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer); - - -/* - * Native API definitions - */ - -// -// Disk I/O Routines -// - -NTSYSAPI -NTSTATUS -NTAPI -NtReadFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtWriteFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtClose(HANDLE Handle); - -NTSYSAPI -NTSTATUS -NTAPI -NtCreateFile(PHANDLE FileHandle, - ACCESS_MASK DesiredAccess, - POBJECT_ATTRIBUTES ObjectAttributes, - PIO_STATUS_BLOCK IoStatusBlock, - PLARGE_INTEGER AllocationSize OPTIONAL, - ULONG FileAttributes, - ULONG ShareAccess, - ULONG CreateDisposition, - ULONG CreateOptions, - PVOID EaBuffer OPTIONAL, - ULONG EaLength); - - -NTSYSAPI -NTSTATUS -NTAPI -NtDeviceIoControlFile( - IN HANDLE FileHandle, - IN HANDLE Event, - IN PIO_APC_ROUTINE ApcRoutine, - IN PVOID ApcContext, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG IoControlCode, - IN PVOID InputBuffer, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer, - OUT ULONG OutputBufferLength - ); - -NTSYSAPI -NTSTATUS -NTAPI -NtFsControlFile( - IN HANDLE FileHandle, - IN HANDLE Event OPTIONAL, - IN PIO_APC_ROUTINE ApcRoutine OPTIONAL, - IN PVOID ApcContext OPTIONAL, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG FsControlCode, - IN PVOID InputBuffer OPTIONAL, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer OPTIONAL, - IN ULONG OutputBufferLength -); - - -NTSYSAPI -NTSTATUS -NTAPI -NtQueryInformationFile( - IN HANDLE FileHandle, - OUT PIO_STATUS_BLOCK IoStatusBlock, - OUT PVOID FileInformation, - IN ULONG Length, - IN FILE_INFORMATION_CLASS FileInformationClass - ); - -// -// Random routines ... -// - -NTSYSAPI -ULONG -NTAPI -RtlRandom( - IN OUT PULONG Seed - ); - -#endif /* __KERNEL__ */ - - -// -// Inode flags (Linux uses octad number, but why ? strange!!!) -// - -#undef S_IFMT -#undef S_IFDIR -#undef S_IFCHR -#undef S_IFREG -#undef S_IREAD -#undef S_IWRITE -#undef S_IEXEC - -#define S_IFMT 0x0F000 /* 017 0000 */ -#define S_IFSOCK 0x0C000 /* 014 0000 */ -#define S_IFLNK 0x0A000 /* 012 0000 */ -#define S_IFREG 0x08000 /* 010 0000 */ -#define S_IFBLK 0x06000 /* 006 0000 */ -#define S_IFDIR 0x04000 /* 004 0000 */ -#define S_IFCHR 0x02000 /* 002 0000 */ -#define S_IFIFO 0x01000 /* 001 0000 */ -#define S_ISUID 0x00800 /* 000 4000 */ -#define S_ISGID 0x00400 /* 000 2000 */ -#define S_ISVTX 0x00200 /* 000 1000 */ - -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) -#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) -#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL) -#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) -#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) - -#define S_IPERMISSION_MASK 0x1FF /* */ - -#define S_IRWXU 0x1C0 /* 0 0700 */ -#define S_IRUSR 0x100 /* 0 0400 */ -#define S_IWUSR 0x080 /* 0 0200 */ -#define S_IXUSR 0x040 /* 0 0100 */ - -#define S_IRWXG 0x038 /* 0 0070 */ -#define S_IRGRP 0x020 /* 0 0040 */ -#define S_IWGRP 0x010 /* 0 0020 */ -#define S_IXGRP 0x008 /* 0 0010 */ - -#define S_IRWXO 0x007 /* 0 0007 */ -#define S_IROTH 0x004 /* 0 0004 */ -#define S_IWOTH 0x002 /* 0 0002 */ -#define S_IXOTH 0x001 /* 0 0001 */ - -#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) -#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) -#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) -#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) -#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) - -/* - * linux ioctl coding definitions - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits. - */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode ioctl numbers.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* - * Io vector ... - */ - -struct iovec -{ - void *iov_base; - size_t iov_len; -}; - - -#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF)) -/* - * Convert a string to an unsigned long long integer. - * - * Ignores `locale' stuff. Assumes that the upper and lower case - * alphabets and digits are each contiguous. - */ -static inline __u64 -strtoull( - char *nptr, - char **endptr, - int base) -{ - char *s = nptr; - __u64 acc, cutoff; - int c, neg = 0, any, cutlim; - - /* - * See strtol for comments as to the logic used. - */ - do { - c = *s++; - } while (isspace(c)); - if (c == '-') { - neg = 1; - c = *s++; - } else if (c == '+') - c = *s++; - if ((base == 0 || base == 16) && - c == '0' && (*s == 'x' || *s == 'X')) { - c = s[1]; - s += 2; - base = 16; - } - if (base == 0) - base = c == '0' ? 8 : 10; - cutoff = (__u64)ULONG_LONG_MAX / (__u64)base; - cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base); - for (acc = 0, any = 0;; c = *s++) { - if (isdigit(c)) - c -= '0'; - else if (isalpha(c)) - c -= isupper(c) ? 'A' - 10 : 'a' - 10; - else - break; - if (c >= base) - break; - if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) - any = -1; - else { - any = 1; - acc *= base; - acc += c; - } - } - if (any < 0) { - acc = ULONG_LONG_MAX; - } else if (neg) - acc = 0 - acc; - if (endptr != 0) - *endptr = (char *) (any ? s - 1 : nptr); - return (acc); -} - -#endif diff --git a/lnet/include/libcfs/winnt/winnt-tcpip.h b/lnet/include/libcfs/winnt/winnt-tcpip.h deleted file mode 100644 index a988247c6b5bf9be9fe51c25380714809a2180e0..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-tcpip.h +++ /dev/null @@ -1,660 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Winnt (kernel and user-level). - * - */ - -#ifndef __LIBCFS_WINNT_TCPIP_H__ -#define __LIBCFS_WINNT_TCPIP_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - - -#ifdef __KERNEL__ - -// -// ks definitions -// - -// iovec is defined in libcfs: winnt_prim.h -// lnetkiov_t is defined in lnet/types.h - -typedef struct socket ksock_tconn_t; -typedef struct socket cfs_socket_t; - -// completion notification callback routine - -typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr); - -/* completion routine to update tx structure for async sending */ -typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); - -// -// tdinal definitions -// - - -#if TDI_LIBCFS_DBG -#define KsPrint(X) KsPrintf X -#else -#define KsPrint(X) -#endif - - -// -// Socket Addresses Related ... -// - -#define INADDR_ANY (ULONG)0x00000000 -#define INADDR_LOOPBACK (ULONG)0x7f000001 -#define INADDR_BROADCAST (ULONG)0xffffffff -#define INADDR_NONE (ULONG)0xffffffff - -/* - * TCP / IP options - */ - -#define SOL_TCP 6 -#define SOL_UDP 17 - - -#define TL_INSTANCE 0 - -#define TCP_SOCKET_NODELAY 1 // disabling "Nagle" -#define TCP_SOCKET_KEEPALIVE 2 -#define TCP_SOCKET_OOBINLINE 3 -#define TCP_SOCKET_BSDURGENT 4 -#define TCP_SOCKET_ATMARK 5 -#define TCP_SOCKET_WINDOW 6 - - -/* Flags we can use with send/ and recv. - Added those for 1003.1g not all are supported yet - */ - -#define MSG_OOB 1 -#define MSG_PEEK 2 -#define MSG_DONTROUTE 4 -#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ -#define MSG_CTRUNC 8 -#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ -#define MSG_TRUNC 0x20 -#define MSG_DONTWAIT 0x40 /* Nonblocking io */ -#define MSG_EOR 0x80 /* End of record */ -#define MSG_WAITALL 0x100 /* Wait for a full request */ -#define MSG_FIN 0x200 -#define MSG_SYN 0x400 -#define MSG_CONFIRM 0x800 /* Confirm path validity */ -#define MSG_RST 0x1000 -#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ -#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ -#define MSG_MORE 0x8000 /* Sender will send more */ - -#define MSG_EOF MSG_FIN - - -// -// Maximum TRANSPORT_ADDRESS Length -// -// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) -// + TDI_ADDRESS_LENGTH_IP -// -// I define it a little large and 16 bytes aligned to avoid possible overflow. -// - -#define MAX_ADDRESS_LENGTH (0x30) - - -// -// Maximum Listers Children Sockets -// - -#define MAX_CHILD_LISTENERS (4) - -// -// Maximum EA Information Length -// - -#define EA_MAX_LENGTH ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \ - TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \ - MAX_ADDRESS_LENGTH ) - - -#define UDP_DEVICE_NAME L"\\Device\\Udp" -#define TCP_DEVICE_NAME L"\\Device\\Tcp" - - -/* - * TSDU definitions - */ - -#define TDINAL_TSDU_DEFAULT_SIZE (0x10000) - -#define KS_TSDU_MAGIC 'KSTD' - -#define KS_TSDU_ATTACHED 0x00000001 // Attached to the socket receive tsdu list - -typedef struct _KS_TSDU { - - ULONG Magic; - ULONG Flags; - - struct list_head Link; - - ULONG TotalLength; // Total size of KS_TSDU - - ULONG StartOffset; // Start offset of the first Tsdu unit - ULONG LastOffset; // End offset of the last Tsdu unit - -/* - union { - KS_TSDU_DAT[]; - KS_TSDU_BUF[]; - KS_TSDU_MDL[]; - } -*/ - -} KS_TSDU, *PKS_TSDU; - -#define TSDU_TYPE_BUF ((USHORT)0x5401) -#define TSDU_TYPE_DAT ((USHORT)0x5402) -#define TSDU_TYPE_MDL ((USHORT)0x5403) - -#define KS_TSDU_BUF_RECEIVING 0x0001 -typedef struct _KS_TSDU_BUF { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - PVOID UserBuffer; - -} KS_TSDU_BUF, *PKS_TSDU_BUF; - -#define KS_TSDU_DAT_RECEIVING 0x0001 - -typedef struct _KS_TSDU_DAT { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - ULONG TotalLength; - - UCHAR Data[1]; - -} KS_TSDU_DAT, *PKS_TSDU_DAT; - -#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03))) -#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data))) - -typedef struct _KS_TSDU_MDL { - - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; - - PMDL Mdl; - PVOID Descriptor; - -} KS_TSDU_MDL, *PKS_TSDU_MDL; - - -typedef struct _KS_TSDUMGR { - - struct list_head TsduList; - ULONG NumOfTsdu; - ULONG TotalBytes; - KEVENT Event; - -} KS_TSDUMGR, *PKS_TSDUMGR; - - -typedef struct _KS_CHAIN { - - KS_TSDUMGR Normal; - KS_TSDUMGR Expedited; - -} KS_CHAIN, *PKS_CHAIN; - - -#define TDINAL_SCHED_FACTOR (1) -#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR)) - -// -// Handler Settings Indictor -// - -#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1) - - -typedef struct _KS_EVENT_HANDLERS { - BOOLEAN IsActive[TDI_EVENT_MAXIMUM_HANDLER]; - PVOID Handler [TDI_EVENT_MAXIMUM_HANDLER]; -} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS; - -#define SetEventHandler(ha, ht, hr) do { \ - ha.IsActive[ht] = TRUE; \ - ha.Handler[ht] = (PVOID) (hr); \ - } while(0) - -// -// KSock Internal Structures -// - -typedef struct _KS_ADDRESS { - - union { - TRANSPORT_ADDRESS Tdi; - UCHAR Pading[MAX_ADDRESS_LENGTH]; - }; - - HANDLE Handle; - PFILE_OBJECT FileObject; - -} KS_ADDRESS, *PKS_ADDRESS; - -// -// Structures for Disconnect Workitem -// - -typedef struct _KS_DISCONNECT_WORKITEM { - - WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection - ksock_tconn_t * tconn; // tdi connecton - ULONG Flags; // connection broken/discnnection flags - KEVENT Event; // sync event - -} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM; - - -typedef struct _KS_CONNECTION { - - HANDLE Handle; // Handle of the tdi connection - PFILE_OBJECT FileObject; // FileObject if the conn object - - PTRANSPORT_ADDRESS Remote; // the ConnectionInfo of this connection - PTDI_CONNECTION_INFORMATION ConnectionInfo; - - ULONG nagle; // Tcp options - -} KS_CONNECTION, *PKS_CONNECTION; - - -// -// type definitions -// - -typedef MDL ksock_mdl_t; -typedef UNICODE_STRING ksock_unicode_name_t; -typedef WORK_QUEUE_ITEM ksock_workitem_t; - - -typedef KS_CHAIN ksock_chain_t; -typedef KS_ADDRESS ksock_tdi_addr_t; -typedef KS_CONNECTION ksock_tconn_info_t; -typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t; - - -// -// Structures for transmission done Workitem -// - -typedef struct _KS_TCPX_FINILIZE { - ksock_workitem_t item; - void * tx; -} ksock_tcpx_fini_t; - - -typedef struct ksock_backlogs { - - struct list_head list; /* list to link the backlog connections */ - int num; /* number of backlogs in the list */ - -} ksock_backlogs_t; - - -typedef struct ksock_daemon { - - ksock_tconn_t * tconn; /* the listener connection object */ - unsigned short nbacklogs; /* number of listening backlog conns */ - unsigned short port; /* listening port number */ - int shutdown; /* daemon threads is to exit */ - struct list_head list; /* to be attached into ksock_nal_data_t*/ - -} ksock_daemon_t ; - - -typedef enum { - - kstt_sender = 0, // normal sending connection type, it's active connection, while - // child tconn is for passive connection. - - kstt_listener, // listener daemon type, it just acts as a daemon, and it does - // not have real connection. It manages children tcons to accept - // or refuse the connecting request from remote peers. - - kstt_child, // accepted child connection type, it's parent must be Listener - kstt_lasttype -} ksock_tconn_type; - -typedef enum { - - ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet - - ksts_inited, // tconn structure initialized: so it now can be identified as - // a sender, listener or a child - - ksts_bind, // tconn is bound: the local address object (ip/port) is created. - // after being bound, we must call ksocknal_put_tconn to release - // the tconn objects, it's not safe just to free the memory of tconn. - - ksts_associated, // the connection object is created and associated with the address - // object. so it's ready for connection. only for child and sender. - - ksts_connecting, // only used by child tconn: in the ConnectEvent handler routine, - // it indicts the child tconn is busy to be connected to the peer. - - ksts_connected, // the connection is built already: for sender and child - - ksts_listening, // listener daemon is working, only for listener tconn - - ksts_disconnected, // disconnected by user - ksts_aborted, // un-exptected broken status - - ksts_last // total number of tconn statuses -} ksock_tconn_state; - -#define KS_TCONN_MAGIC 'KSTM' - -#define KS_TCONN_HANDLERS_SET 0x00000001 // Conection handlers are set. -#define KS_TCONN_DISCONNECT_BUSY 0x00010000 // Disconnect Workitem is queued ... -#define KS_TCONN_DESTROY_BUSY 0x00020000 // Destory Workitem is queued ... - -#define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started, - // only valid for listener - -struct socket { - - ulong_ptr kstc_magic; /* Magic & Flags */ - ulong_ptr kstc_flags; - - spinlock_t kstc_lock; /* serialise lock*/ - void * kstc_conn; /* ksock_conn_t */ - - ksock_tconn_type kstc_type; /* tdi connection Type */ - ksock_tconn_state kstc_state; /* tdi connection state flag */ - - ksock_unicode_name_t kstc_dev; /* tcp transport device name */ - - ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */ - - atomic_t kstc_refcount; /* reference count of ksock_tconn */ - - struct list_head kstc_list; /* linked to global ksocknal_data */ - - union { - - struct { - int nbacklog; /* total number of backlog tdi connections */ - ksock_backlogs_t kstc_listening; /* listeing backlog child connections */ - ksock_backlogs_t kstc_accepted; /* connected backlog child connections */ - event_t kstc_accept_event; /* Signaled by AcceptedHander, - ksocknal_wait_accpeted_conns waits on */ - event_t kstc_destroy_event; /* Signaled when accepted child is released */ - } listener; - - struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ - - int kstc_queued; /* Attached to Parent->ChildList ... */ - int kstc_queueno; /* 0: Attached to Listening list - 1: Attached to Accepted list */ - - int kstc_busy; /* referred by ConnectEventCallback ? */ - int kstc_accepted; /* the connection is built ready ? */ - - struct list_head kstc_link; /* linked to parent tdi connection */ - ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */ - } child; - - struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ - } sender; - }; - - ulong_ptr kstc_snd_wnd; /* Sending window size */ - ulong_ptr kstc_rcv_wnd; /* Recving window size */ - - ksock_workitem_t kstc_destroy; /* tconn destruction workitem */ - ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */ - - ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */ - ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */ -}; - -#define SOCK_WMEM_QUEUED(sock) (0) - -#define TDINAL_WINDOW_DEFAULT_SIZE (0x100000) - - -struct _KS_UDP_COMPLETION_CONTEXT; -struct _KS_TCP_COMPLETION_CONTEXT; - - -typedef -NTSTATUS -(*PKS_UDP_COMPLETION_ROUTINE) ( - IN PIRP Irp, - IN struct _KS_UDP_COMPLETION_CONTEXT - *UdpContext - ); - - -typedef -NTSTATUS -(*PKS_TCP_COMPLETION_ROUTINE) ( - IN PIRP Irp, - IN struct _KS_TCP_COMPLETION_CONTEXT - *TcpContext - ); - -// -// Udp Irp Completion Context -// - -typedef struct _KS_UDP_COMPLETION_CONTEXT { - - PKEVENT Event; - union { - PFILE_OBJECT AddressObject; - ksock_tconn_t * tconn; - }; - - PKS_UDP_COMPLETION_ROUTINE CompletionRoutine; - PVOID CompletionContext; - -} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT; - - -// -// Tcp Irp Completion Context (used by tcp data recv/send) -// - -typedef struct _KS_TCP_COMPLETION_CONTEXT { - - PKEVENT Event; // Event to be waited on by Irp caller ... - - ksock_tconn_t * tconn; // the tdi connection - - PKS_TCP_COMPLETION_ROUTINE CompletionRoutine; - PVOID CompletionContext; - PVOID CompletionContext2; - - PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager - - // - // These tow new members are for NON_BLOCKING transmission - // - - BOOLEAN bCounted; // To indict needing refcount to - // execute CompetionRoutine - ULONG ReferCount; // Refer count of this structure - -} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT; - -typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t; - - -/* - * tdi extensions - */ - -#define IOCTL_TCP_QUERY_INFORMATION_EX \ - CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS) -#define IOCTL_TCP_SET_INFORMATION_EX \ - CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS) - - -#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\ - { \ - PIO_STACK_LOCATION _IRPSP; \ - if ( CompRoutine != NULL) { \ - IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ - } else { \ - IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ - } \ - _IRPSP = IoGetNextIrpStackLocation (Irp); \ - _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ - _IRPSP->DeviceObject = DevObj; \ - _IRPSP->FileObject = FileObj; \ - _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0; \ - _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen; \ - _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX; \ - Irp->AssociatedIrp.SystemBuffer = Buffer; \ - } - - -#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\ - { \ - PIO_STACK_LOCATION _IRPSP; \ - if ( CompRoutine != NULL) { \ - IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ - } else { \ - IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ - } \ - _IRPSP = IoGetNextIrpStackLocation (Irp); \ - _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ - _IRPSP->DeviceObject = DevObj; \ - _IRPSP->FileObject = FileObj; \ - _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength; \ - _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength; \ - _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX; \ - _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer; \ - Irp->UserBuffer = OutBuffer; \ - } - - -typedef struct ks_addr_slot { - LIST_ENTRY link; - int up; - char iface[40]; - __u32 ip_addr; - __u32 netmask; - UNICODE_STRING devname; - WCHAR buffer[1]; -} ks_addr_slot_t; - -typedef struct { - - /* - * Tdi client information - */ - - UNICODE_STRING ksnd_client_name; /* tdi client module name */ - HANDLE ksnd_pnp_handle; /* the handle for pnp changes */ - - spinlock_t ksnd_addrs_lock; /* serialize ip address list access */ - LIST_ENTRY ksnd_addrs_list; /* list of the ip addresses */ - int ksnd_naddrs; /* number of the ip addresses */ - - /* - * Tdilnd internal defintions - */ - - int ksnd_init; /* initialisation state */ - - TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */ - - spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */ - - int ksnd_ntconns; /* number of tconns attached in list */ - struct list_head ksnd_tconns; /* tdi connections list */ - cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */ - event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */ - - spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */ - - int ksnd_ntsdus; /* number of tsdu buffers allocated */ - ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */ - cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */ - - int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */ - struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */ - - spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */ - int ksnd_ndaemons; /* number of listening daemons */ - struct list_head ksnd_daemons; /* listening daemon list */ - event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */ - -} ks_data_t; - -int -ks_init_tdi_data(); - -void -ks_fini_tdi_data(); - - -#endif /* __KERNEL__ */ -#endif /* __LIBCFS_WINNT_TCPIP_H__ */ - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/winnt-time.h b/lnet/include/libcfs/winnt/winnt-time.h deleted file mode 100644 index d31f854b9efd5e9a8186b1b956c881ddc308a949..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-time.h +++ /dev/null @@ -1,315 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for Winnt (kernel and user-level). - * - */ - -#ifndef __LIBCFS_WINNT_LINUX_TIME_H__ -#define __LIBCFS_WINNT_LINUX_TIME_H__ - -#ifndef __LIBCFS_LIBCFS_H__ -#error Do not #include this file directly. #include <libcfs/libcfs.h> instead -#endif - -/* Portable time API */ - -/* - * Platform provides three opaque data-types: - * - * cfs_time_t represents point in time. This is internal kernel - * time rather than "wall clock". This time bears no - * relation to gettimeofday(). - * - * cfs_duration_t represents time interval with resolution of internal - * platform clock - * - * cfs_fs_time_t represents instance in world-visible time. This is - * used in file-system time-stamps - * - * cfs_time_t cfs_time_current(void); - * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); - * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); - * int cfs_time_before (cfs_time_t, cfs_time_t); - * int cfs_time_beforeq(cfs_time_t, cfs_time_t); - * - * cfs_duration_t cfs_duration_build(int64_t); - * - * time_t cfs_duration_sec (cfs_duration_t); - * void cfs_duration_usec(cfs_duration_t, struct timeval *); - * void cfs_duration_nsec(cfs_duration_t, struct timespec *); - * - * void cfs_fs_time_current(cfs_fs_time_t *); - * time_t cfs_fs_time_sec (cfs_fs_time_t *); - * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); - * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); - * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); - * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); - * - * CFS_TIME_FORMAT - * CFS_DURATION_FORMAT - * - */ - -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION ((u_int64_t) 1000000) - -#define HZ (100) - -struct timeval { - time_t tv_sec; /* seconds */ - suseconds_t tv_usec; /* microseconds */ -}; - -struct timespec { - ulong_ptr tv_sec; - ulong_ptr tv_nsec; -}; - -#ifdef __KERNEL__ - -#include <libcfs/winnt/portals_compat25.h> - -/* - * Generic kernel stuff - */ - -typedef struct timeval cfs_fs_time_t; - -typedef u_int64_t cfs_time_t; -typedef int64_t cfs_duration_t; - -static inline void do_gettimeofday(struct timeval *tv) -{ - LARGE_INTEGER Time; - - KeQuerySystemTime(&Time); - - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; -} - -static inline cfs_time_t JIFFIES() -{ - LARGE_INTEGER Tick; - LARGE_INTEGER Elapse; - - KeQueryTickCount(&Tick); - - Elapse.QuadPart = Tick.QuadPart * KeQueryTimeIncrement(); - Elapse.QuadPart /= (10000000 / HZ); - - return Elapse.QuadPart; -} - -static inline cfs_time_t cfs_time_current(void) -{ - return JIFFIES(); -} - -static inline cfs_time_t cfs_time_current_sec(void) -{ - return (JIFFIES() / HZ); -} - -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return (t + d); -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return (t1 - t2); -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) < 0; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) <= 0; -} - -static inline void cfs_fs_time_current(cfs_fs_time_t *t) -{ - ULONG Linux; - LARGE_INTEGER Sys; - - KeQuerySystemTime(&Sys); - - RtlTimeToSecondsSince1970(&Sys, &Linux); - - t->tv_sec = Linux; - t->tv_usec = (Sys.LowPart % 10000000) / 10; -} - -static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t) -{ - return t->tv_sec; -} - -static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) -{ - return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec; -} - -static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2)); -} - -static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) -{ - return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2)); -} - -static inline cfs_duration_t cfs_time_seconds(int seconds) -{ - return (cfs_duration_t)seconds * HZ; -} - -static inline cfs_time_t cfs_duration_sec(cfs_duration_t d) -{ - return d / HZ; -} - -static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) -{ - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * - ONE_MILLION / HZ); -} - -static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) -{ - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * - ONE_BILLION / HZ); -} - -static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) -{ - *v = *t; -} - -static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) -{ - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * 1000; -} - -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before - -/* - * One jiffy - */ -#define CFS_TICK (1) - -#define LTIME_S(t) (t) - -#define CFS_TIME_T "%I64u" -#define CFS_DURATION_T "%I64d" - -#else /* !__KERNEL__ */ - -/* - * Liblustre. time(2) based implementation. - */ -#include <libcfs/user-time.h> - - -// -// Time routines ... -// - -NTSYSAPI -CCHAR -NTAPI -NtQuerySystemTime( - OUT PLARGE_INTEGER CurrentTime - ); - - -NTSYSAPI -BOOLEAN -NTAPI -RtlTimeToSecondsSince1970( - IN PLARGE_INTEGER Time, - OUT PULONG ElapsedSeconds - ); - - -NTSYSAPI -VOID -NTAPI -RtlSecondsSince1970ToTime( - IN ULONG ElapsedSeconds, - OUT PLARGE_INTEGER Time - ); - -NTSYSAPI -VOID -NTAPI -Sleep( - DWORD dwMilliseconds // sleep time in milliseconds -); - - -static inline void sleep(int time) -{ - DWORD Time = 1000 * time; - Sleep(Time); -} - - -static inline void do_gettimeofday(struct timeval *tv) -{ - LARGE_INTEGER Time; - - NtQuerySystemTime(&Time); - - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; -} - -static inline int gettimeofday(struct timeval *tv, void * tz) -{ - do_gettimeofday(tv); - return 0; -} - -#endif /* __KERNEL__ */ - -/* __LIBCFS_LINUX_LINUX_TIME_H__ */ -#endif -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/include/libcfs/winnt/winnt-types.h b/lnet/include/libcfs/winnt/winnt-types.h deleted file mode 100644 index 6478730fe5a95c8071b088b7a94912e41ce80ed6..0000000000000000000000000000000000000000 --- a/lnet/include/libcfs/winnt/winnt-types.h +++ /dev/null @@ -1,647 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic types definitions - * - */ - -#ifndef _WINNT_TYPE_H -#define _WINNT_TYPE_H - -#ifdef __KERNEL__ - -#include <ntifs.h> -#include <windef.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> - -#include <tdi.h> -#include <tdikrnl.h> -#include <tdiinfo.h> - -#else - -#include <ntddk.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> -#include <time.h> -#include <io.h> -#include <string.h> -#include <assert.h> - -#endif - - -#define __LITTLE_ENDIAN - -#define inline __inline -#define __inline__ __inline - -typedef unsigned __int8 __u8; -typedef signed __int8 __s8; - -typedef signed __int64 __s64; -typedef unsigned __int64 __u64; - -typedef signed __int16 __s16; -typedef unsigned __int16 __u16; - -typedef signed __int32 __s32; -typedef unsigned __int32 __u32; - -typedef signed __int64 __s64; -typedef unsigned __int64 __u64; - -typedef unsigned long ULONG; - - -#if defined(_WIN64) - #define long_ptr __int64 - #define ulong_ptr unsigned __int64 - #define BITS_PER_LONG (64) -#else - #define long_ptr long - #define ulong_ptr unsigned long - #define BITS_PER_LONG (32) - -#endif - -/* bsd */ -typedef unsigned char u_char; -typedef unsigned short u_short; -typedef unsigned int u_int; -typedef unsigned long u_long; - -/* sysv */ -typedef unsigned char unchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -#ifndef __BIT_TYPES_DEFINED__ -#define __BIT_TYPES_DEFINED__ - -typedef __u8 u_int8_t; -typedef __s8 int8_t; -typedef __u16 u_int16_t; -typedef __s16 int16_t; -typedef __u32 u_int32_t; -typedef __s32 int32_t; - -#endif /* !(__BIT_TYPES_DEFINED__) */ - -typedef __u8 uint8_t; -typedef __u16 uint16_t; -typedef __u32 uint32_t; - -typedef __u64 uint64_t; -typedef __u64 u_int64_t; -typedef __s64 int64_t; - -typedef long ssize_t; - -typedef __u32 suseconds_t; - -typedef __u32 pid_t, tid_t; - -typedef __u16 uid_t, gid_t; - -typedef __u16 mode_t; -typedef __u16 umode_t; - -typedef ulong_ptr sigset_t; - -typedef uint64_t loff_t; -typedef HANDLE cfs_handle_t; -typedef uint64_t cycles_t; - -#ifndef INVALID_HANDLE_VALUE -#define INVALID_HANDLE_VALUE ((HANDLE)-1) -#endif - - -#ifdef __KERNEL__ /* kernel */ - -typedef __u32 off_t; -typedef __u32 time_t; - -typedef unsigned short kdev_t; - -#else /* !__KERNEL__ */ - -typedef int BOOL; -typedef __u8 BYTE; -typedef __u16 WORD; -typedef __u32 DWORD; - -#endif /* __KERNEL__ */ - -/* - * Conastants suffix - */ - -#define ULL i64 -#define ull i64 - -/* - * Winnt kernel has no capabilities. - */ - -typedef __u32 cfs_kernel_cap_t; - -#define INT_MAX ((int)(~0U>>1)) -#define INT_MIN (-INT_MAX - 1) -#define UINT_MAX (~0U) - -#endif /* _WINNT_TYPES_H */ - - -/* - * Bytes order - */ - -// -// Byte order swapping routines -// - - -#define ___swab16(x) RtlUshortByteSwap(x) -#define ___swab32(x) RtlUlongByteSwap(x) -#define ___swab64(x) RtlUlonglongByteSwap(x) - -#define ___constant_swab16(x) \ - ((__u16)( \ - (((__u16)(x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(x) & (__u16)0xff00U) >> 8) )) - -#define ___constant_swab32(x) \ - ((__u32)( \ - (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) - -#define ___constant_swab64(x) \ - ((__u64)( \ - (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \ - (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \ - (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \ - (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \ - (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \ - (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) )) - - -#define __swab16(x) ___constant_swab16(x) -#define __swab32(x) ___constant_swab32(x) -#define __swab64(x) ___constant_swab64(x) - -#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0) -#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0) -#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0) - -#define __constant_htonl(x) ___constant_swab32((x)) -#define __constant_ntohl(x) ___constant_swab32((x)) -#define __constant_htons(x) ___constant_swab16((x)) -#define __constant_ntohs(x) ___constant_swab16((x)) -#define __constant_cpu_to_le64(x) ((__u64)(x)) -#define __constant_le64_to_cpu(x) ((__u64)(x)) -#define __constant_cpu_to_le32(x) ((__u32)(x)) -#define __constant_le32_to_cpu(x) ((__u32)(x)) -#define __constant_cpu_to_le16(x) ((__u16)(x)) -#define __constant_le16_to_cpu(x) ((__u16)(x)) -#define __constant_cpu_to_be64(x) ___constant_swab64((x)) -#define __constant_be64_to_cpu(x) ___constant_swab64((x)) -#define __constant_cpu_to_be32(x) ___constant_swab32((x)) -#define __constant_be32_to_cpu(x) ___constant_swab32((x)) -#define __constant_cpu_to_be16(x) ___constant_swab16((x)) -#define __constant_be16_to_cpu(x) ___constant_swab16((x)) -#define __cpu_to_le64(x) ((__u64)(x)) -#define __le64_to_cpu(x) ((__u64)(x)) -#define __cpu_to_le32(x) ((__u32)(x)) -#define __le32_to_cpu(x) ((__u32)(x)) -#define __cpu_to_le16(x) ((__u16)(x)) -#define __le16_to_cpu(x) ((__u16)(x)) -#define __cpu_to_be64(x) __swab64((x)) -#define __be64_to_cpu(x) __swab64((x)) -#define __cpu_to_be32(x) __swab32((x)) -#define __be32_to_cpu(x) __swab32((x)) -#define __cpu_to_be16(x) __swab16((x)) -#define __be16_to_cpu(x) __swab16((x)) -#define __cpu_to_le64p(x) (*(__u64*)(x)) -#define __le64_to_cpup(x) (*(__u64*)(x)) -#define __cpu_to_le32p(x) (*(__u32*)(x)) -#define __le32_to_cpup(x) (*(__u32*)(x)) -#define __cpu_to_le16p(x) (*(__u16*)(x)) -#define __le16_to_cpup(x) (*(__u16*)(x)) -#define __cpu_to_be64p(x) __swab64p((x)) -#define __be64_to_cpup(x) __swab64p((x)) -#define __cpu_to_be32p(x) __swab32p((x)) -#define __be32_to_cpup(x) __swab32p((x)) -#define __cpu_to_be16p(x) __swab16p((x)) -#define __be16_to_cpup(x) __swab16p((x)) -#define __cpu_to_le64s(x) do {} while (0) -#define __le64_to_cpus(x) do {} while (0) -#define __cpu_to_le32s(x) do {} while (0) -#define __le32_to_cpus(x) do {} while (0) -#define __cpu_to_le16s(x) do {} while (0) -#define __le16_to_cpus(x) do {} while (0) -#define __cpu_to_be64s(x) __swab64s((x)) -#define __be64_to_cpus(x) __swab64s((x)) -#define __cpu_to_be32s(x) __swab32s((x)) -#define __be32_to_cpus(x) __swab32s((x)) -#define __cpu_to_be16s(x) __swab16s((x)) -#define __be16_to_cpus(x) __swab16s((x)) - -#ifndef cpu_to_le64 -#define cpu_to_le64 __cpu_to_le64 -#define le64_to_cpu __le64_to_cpu -#define cpu_to_le32 __cpu_to_le32 -#define le32_to_cpu __le32_to_cpu -#define cpu_to_le16 __cpu_to_le16 -#define le16_to_cpu __le16_to_cpu -#endif - -#define cpu_to_be64 __cpu_to_be64 -#define be64_to_cpu __be64_to_cpu -#define cpu_to_be32 __cpu_to_be32 -#define be32_to_cpu __be32_to_cpu -#define cpu_to_be16 __cpu_to_be16 -#define be16_to_cpu __be16_to_cpu -#define cpu_to_le64p __cpu_to_le64p -#define le64_to_cpup __le64_to_cpup -#define cpu_to_le32p __cpu_to_le32p -#define le32_to_cpup __le32_to_cpup -#define cpu_to_le16p __cpu_to_le16p -#define le16_to_cpup __le16_to_cpup -#define cpu_to_be64p __cpu_to_be64p -#define be64_to_cpup __be64_to_cpup -#define cpu_to_be32p __cpu_to_be32p -#define be32_to_cpup __be32_to_cpup -#define cpu_to_be16p __cpu_to_be16p -#define be16_to_cpup __be16_to_cpup -#define cpu_to_le64s __cpu_to_le64s -#define le64_to_cpus __le64_to_cpus -#define cpu_to_le32s __cpu_to_le32s -#define le32_to_cpus __le32_to_cpus -#define cpu_to_le16s __cpu_to_le16s -#define le16_to_cpus __le16_to_cpus -#define cpu_to_be64s __cpu_to_be64s -#define be64_to_cpus __be64_to_cpus -#define cpu_to_be32s __cpu_to_be32s -#define be32_to_cpus __be32_to_cpus -#define cpu_to_be16s __cpu_to_be16s -#define be16_to_cpus __be16_to_cpus - - -// -// Network to host byte swap functions -// - -#define ntohl(x) ( ( ( ( x ) & 0x000000ff ) << 24 ) | \ - ( ( ( x ) & 0x0000ff00 ) << 8 ) | \ - ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \ - ( ( ( x ) & 0xff000000 ) >> 24 ) ) - -#define ntohs(x) ( ( ( ( x ) & 0xff00 ) >> 8 ) | \ - ( ( ( x ) & 0x00ff ) << 8 ) ) - - -#define htonl(x) ntohl(x) -#define htons(x) ntohs(x) - - - -#ifndef _I386_ERRNO_H -#define _I386_ERRNO_H - -#define EPERM 1 /* Operation not permitted */ -#define ENOENT 2 /* No such file or directory */ -#define ESRCH 3 /* No such process */ -#define EINTR 4 /* Interrupted system call */ -#define EIO 5 /* I/O error */ -#define ENXIO 6 /* No such device or address */ -#define E2BIG 7 /* Arg list too long */ -#define ENOEXEC 8 /* Exec format error */ -#define EBADF 9 /* Bad file number */ -#define ECHILD 10 /* No child processes */ -#define EAGAIN 11 /* Try again */ -#define ENOMEM 12 /* Out of memory */ -#define EACCES 13 /* Permission denied */ -#define EFAULT 14 /* Bad address */ -#define ENOTBLK 15 /* Block device required */ -#define EBUSY 16 /* Device or resource busy */ -#define EEXIST 17 /* File exists */ -#define EXDEV 18 /* Cross-device link */ -#define ENODEV 19 /* No such device */ -#define ENOTDIR 20 /* Not a directory */ -#define EISDIR 21 /* Is a directory */ -#define EINVAL 22 /* Invalid argument */ -#define ENFILE 23 /* File table overflow */ -#define EMFILE 24 /* Too many open files */ -#define ENOTTY 25 /* Not a typewriter */ -#define ETXTBSY 26 /* Text file busy */ -#define EFBIG 27 /* File too large */ -#define ENOSPC 28 /* No space left on device */ -#define ESPIPE 29 /* Illegal seek */ -#define EROFS 30 /* Read-only file system */ -#define EMLINK 31 /* Too many links */ -#define EPIPE 32 /* Broken pipe */ -#define EDOM 33 /* Math argument out of domain of func */ -#define ERANGE 34 /* Math result not representable */ -#undef EDEADLK -#define EDEADLK 35 /* Resource deadlock would occur */ -#undef ENAMETOOLONG -#define ENAMETOOLONG 36 /* File name too long */ -#undef ENOLCK -#define ENOLCK 37 /* No record locks available */ -#undef ENOSYS -#define ENOSYS 38 /* Function not implemented */ -#undef ENOTEMPTY -#define ENOTEMPTY 39 /* Directory not empty */ -#define ELOOP 40 /* Too many symbolic links encountered */ -#define EWOULDBLOCK EAGAIN /* Operation would block */ -#define ENOMSG 42 /* No message of desired type */ -#define EIDRM 43 /* Identifier removed */ -#define ECHRNG 44 /* Channel number out of range */ -#define EL2NSYNC 45 /* Level 2 not synchronized */ -#define EL3HLT 46 /* Level 3 halted */ -#define EL3RST 47 /* Level 3 reset */ -#define ELNRNG 48 /* Link number out of range */ -#define EUNATCH 49 /* Protocol driver not attached */ -#define ENOCSI 50 /* No CSI structure available */ -#define EL2HLT 51 /* Level 2 halted */ -#define EBADE 52 /* Invalid exchange */ -#define EBADR 53 /* Invalid request descriptor */ -#define EXFULL 54 /* Exchange full */ -#define ENOANO 55 /* No anode */ -#define EBADRQC 56 /* Invalid request code */ -#define EBADSLT 57 /* Invalid slot */ - -#define EDEADLOCK EDEADLK - -#define EBFONT 59 /* Bad font file format */ -#define ENOSTR 60 /* Device not a stream */ -#define ENODATA 61 /* No data available */ -#define ETIME 62 /* Timer expired */ -#define ENOSR 63 /* Out of streams resources */ -#define ENONET 64 /* Machine is not on the network */ -#define ENOPKG 65 /* Package not installed */ -#define EREMOTE 66 /* Object is remote */ -#define ENOLINK 67 /* Link has been severed */ -#define EADV 68 /* Advertise error */ -#define ESRMNT 69 /* Srmount error */ -#define ECOMM 70 /* Communication error on send */ -#define EPROTO 71 /* Protocol error */ -#define EMULTIHOP 72 /* Multihop attempted */ -#define EDOTDOT 73 /* RFS specific error */ -#define EBADMSG 74 /* Not a data message */ -#define EOVERFLOW 75 /* Value too large for defined data type */ -#define ENOTUNIQ 76 /* Name not unique on network */ -#define EBADFD 77 /* File descriptor in bad state */ -#define EREMCHG 78 /* Remote address changed */ -#define ELIBACC 79 /* Can not access a needed shared library */ -#define ELIBBAD 80 /* Accessing a corrupted shared library */ -#define ELIBSCN 81 /* .lib section in a.out corrupted */ -#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ -#define ELIBEXEC 83 /* Cannot exec a shared library directly */ -#undef EILSEQ -#define EILSEQ 84 /* Illegal byte sequence */ -#define ERESTART 85 /* Interrupted system call should be restarted */ -#define ESTRPIPE 86 /* Streams pipe error */ -#define EUSERS 87 /* Too many users */ -#define ENOTSOCK 88 /* Socket operation on non-socket */ -#define EDESTADDRREQ 89 /* Destination address required */ -#define EMSGSIZE 90 /* Message too long */ -#define EPROTOTYPE 91 /* Protocol wrong type for socket */ -#define ENOPROTOOPT 92 /* Protocol not available */ -#define EPROTONOSUPPORT 93 /* Protocol not supported */ -#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ -#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ -#define EPFNOSUPPORT 96 /* Protocol family not supported */ -#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ -#define EADDRINUSE 98 /* Address already in use */ -#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ -#define ENETDOWN 100 /* Network is down */ -#define ENETUNREACH 101 /* Network is unreachable */ -#define ENETRESET 102 /* Network dropped connection because of reset */ -#define ECONNABORTED 103 /* Software caused connection abort */ -#define ECONNRESET 104 /* Connection reset by peer */ -#define ENOBUFS 105 /* No buffer space available */ -#define EISCONN 106 /* Transport endpoint is already connected */ -#define ENOTCONN 107 /* Transport endpoint is not connected */ -#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ -#define ETOOMANYREFS 109 /* Too many references: cannot splice */ -#define ETIMEDOUT 110 /* Connection timed out */ -#define ECONNREFUSED 111 /* Connection refused */ -#define EHOSTDOWN 112 /* Host is down */ -#define EHOSTUNREACH 113 /* No route to host */ -#define EALREADY 114 /* Operation already in progress */ -#define EINPROGRESS 115 /* Operation now in progress */ -#define ESTALE 116 /* Stale NFS file handle */ -#define EUCLEAN 117 /* Structure needs cleaning */ -#define ENOTNAM 118 /* Not a XENIX named type file */ -#define ENAVAIL 119 /* No XENIX semaphores available */ -#define EISNAM 120 /* Is a named type file */ -#define EREMOTEIO 121 /* Remote I/O error */ -#define EDQUOT 122 /* Quota exceeded */ - -#define ENOMEDIUM 123 /* No medium found */ -#define EMEDIUMTYPE 124 /* Wrong medium type */ - -/* Should never be seen by user programs */ -#define ERESTARTSYS 512 -#define ERESTARTNOINTR 513 -#define ERESTARTNOHAND 514 /* restart if no handler.. */ -#define ENOIOCTLCMD 515 /* No ioctl command */ - -/* Defined for the NFSv3 protocol */ -#define EBADHANDLE 521 /* Illegal NFS file handle */ -#define ENOTSYNC 522 /* Update synchronization mismatch */ -#define EBADCOOKIE 523 /* Cookie is stale */ -#define ENOTSUPP 524 /* Operation is not supported */ -#define ETOOSMALL 525 /* Buffer or request is too small */ -#define ESERVERFAULT 526 /* An untranslatable error occurred */ -#define EBADTYPE 527 /* Type not supported by server */ -#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ - - - -/* open/fcntl - O_SYNC is only implemented on blocks devices and on files - located on an ext2 file system */ -#define O_ACCMODE 0003 -#define O_RDONLY 00 -#define O_WRONLY 01 -#define O_RDWR 02 -#define O_CREAT 0100 /* not fcntl */ -#define O_EXCL 0200 /* not fcntl */ -#define O_NOCTTY 0400 /* not fcntl */ -#define O_TRUNC 01000 /* not fcntl */ -#define O_APPEND 02000 -#define O_NONBLOCK 04000 -#define O_NDELAY O_NONBLOCK -#define O_SYNC 010000 -#define FASYNC 020000 /* fcntl, for BSD compatibility */ -#define O_DIRECT 040000 /* direct disk access hint */ -#define O_LARGEFILE 0100000 -#define O_DIRECTORY 0200000 /* must be a directory */ -#define O_NOFOLLOW 0400000 /* don't follow links */ - -#define F_DUPFD 0 /* dup */ -#define F_GETFD 1 /* get close_on_exec */ -#define F_SETFD 2 /* set/clear close_on_exec */ -#define F_GETFL 3 /* get file->f_flags */ -#define F_SETFL 4 /* set file->f_flags */ -#define F_GETLK 5 -#define F_SETLK 6 -#define F_SETLKW 7 - -#define F_SETOWN 8 /* for sockets. */ -#define F_GETOWN 9 /* for sockets. */ -#define F_SETSIG 10 /* for sockets. */ -#define F_GETSIG 11 /* for sockets. */ - -#define F_GETLK64 12 /* using 'struct flock64' */ -#define F_SETLK64 13 -#define F_SETLKW64 14 - -/* for F_[GET|SET]FL */ -#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ - -/* for posix fcntl() and lockf() */ -#define F_RDLCK 0 -#define F_WRLCK 1 -#define F_UNLCK 2 - -/* for old implementation of bsd flock () */ -#define F_EXLCK 4 /* or 3 */ -#define F_SHLCK 8 /* or 4 */ - -/* for leases */ -#define F_INPROGRESS 16 - -/* operations for bsd flock(), also used by the kernel implementation */ -#define LOCK_SH 1 /* shared lock */ -#define LOCK_EX 2 /* exclusive lock */ -#define LOCK_NB 4 /* or'd with one of the above to prevent - blocking */ -#define LOCK_UN 8 /* remove lock */ - -#define LOCK_MAND 32 /* This is a mandatory flock */ -#define LOCK_READ 64 /* ... Which allows concurrent read operations */ -#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ -#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ - -#endif - - -#ifndef LIBCFS_SIGNAL_H -#define LIBCFS_SIGNAL_H - -/* - * signal values ... - */ - -#define SIGHUP 1 -#define SIGINT 2 -#define SIGQUIT 3 -#define SIGILL 4 -#define SIGTRAP 5 -#define SIGABRT 6 -#define SIGIOT 6 -#define SIGBUS 7 -#define SIGFPE 8 -#define SIGKILL 9 -#define SIGUSR1 10 -#define SIGSEGV 11 -#define SIGUSR2 12 -#define SIGPIPE 13 -#define SIGALRM 14 -#define SIGTERM 15 -#define SIGSTKFLT 16 -#define SIGCHLD 17 -#define SIGCONT 18 -#define SIGSTOP 19 -#define SIGTSTP 20 -#define SIGTTIN 21 -#define SIGTTOU 22 -#define SIGURG 23 -#define SIGXCPU 24 -#define SIGXFSZ 25 -#define SIGVTALRM 26 -#define SIGPROF 27 -#define SIGWINCH 28 -#define SIGIO 29 -#define SIGPOLL SIGIO -/* -#define SIGLOST 29 -*/ -#define SIGPWR 30 -#define SIGSYS 31 -#define SIGUNUSED 31 - -/* These should not be considered constants from userland. */ -#define SIGRTMIN 32 -#define SIGRTMAX (_NSIG-1) - -/* - * SA_FLAGS values: - * - * SA_ONSTACK indicates that a registered stack_t will be used. - * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the - * SA_RESTART flag to get restarting signals (which were the default long ago) - * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. - * SA_RESETHAND clears the handler when the signal is delivered. - * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. - * SA_NODEFER prevents the current signal from being masked in the handler. - * - * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single - * Unix names RESETHAND and NODEFER respectively. - */ -#define SA_NOCLDSTOP 0x00000001 -#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ -#define SA_SIGINFO 0x00000004 -#define SA_ONSTACK 0x08000000 -#define SA_RESTART 0x10000000 -#define SA_NODEFER 0x40000000 -#define SA_RESETHAND 0x80000000 - -#define SA_NOMASK SA_NODEFER -#define SA_ONESHOT SA_RESETHAND -#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ - -#define SA_RESTORER 0x04000000 - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 - - -#define sigmask(sig) ((__u32)1 << ((sig) - 1)) - -#endif // LIBCFS_SIGNAL_H \ No newline at end of file diff --git a/lnet/include/lnet/.cvsignore b/lnet/include/lnet/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am deleted file mode 100644 index a6e51599e9ad4a6bf28ff35d2a4b9e0ff8211de3..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -lnetdir=$(includedir)/lnet - -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -EXTRA_DIST = api.h api-support.h \ - lib-lnet.h lib-types.h lnet.h lnetctl.h types.h \ - socklnd.h ptllnd.h ptllnd_wire.h diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h deleted file mode 100644 index 717559fd92685faca1230fa3deffbaddd646f7b4..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/api-support.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef __LNET_API_SUPPORT_H__ -#define __LNET_API_SUPPORT_H__ - -#if defined(__linux__) -#include <lnet/linux/api-support.h> -#elif defined(__APPLE__) -#include <lnet/darwin/api-support.h> -#elif defined(__WINNT__) -#include <lnet/winnt/api-support.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> - -#endif diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h deleted file mode 100644 index c240aa27493b5547165751125e733ff72cb1634a..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/api.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef __LNET_API_H__ -#define __LNET_API_H__ - -#include <lnet/types.h> - -int LNetInit(void); -void LNetFini(void); - -int LNetNIInit(lnet_pid_t requested_pid); -int LNetNIFini(void); - -int LNetGetId(unsigned int index, lnet_process_id_t *id); -int LNetDist(lnet_nid_t nid, lnet_nid_t *srcnid, int *order); -int LNetCtl(unsigned int cmd, void *arg); -void LNetSnprintHandle (char *str, int str_len, lnet_handle_any_t handle); - -/* - * Portals - */ -int LNetSetLazyPortal(int portal); -int LNetClearLazyPortal(int portal); - -/* - * Match entries - */ -int LNetMEAttach(unsigned int portal, - lnet_process_id_t match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - lnet_unlink_t unlink_in, - lnet_ins_pos_t pos_in, - lnet_handle_me_t *handle_out); - -int LNetMEInsert(lnet_handle_me_t current_in, - lnet_process_id_t match_id_in, - __u64 match_bits_in, - __u64 ignore_bits_in, - lnet_unlink_t unlink_in, - lnet_ins_pos_t position_in, - lnet_handle_me_t *handle_out); - -int LNetMEUnlink(lnet_handle_me_t current_in); - -/* - * Memory descriptors - */ -int LNetMDAttach(lnet_handle_me_t current_in, - lnet_md_t md_in, - lnet_unlink_t unlink_in, - lnet_handle_md_t *handle_out); - -int LNetMDBind(lnet_md_t md_in, - lnet_unlink_t unlink_in, - lnet_handle_md_t *handle_out); - -int LNetMDUnlink(lnet_handle_md_t md_in); - -/* - * Event queues - */ -int LNetEQAlloc(unsigned int count_in, - lnet_eq_handler_t handler, - lnet_handle_eq_t *handle_out); - -int LNetEQFree(lnet_handle_eq_t eventq_in); - -int LNetEQGet(lnet_handle_eq_t eventq_in, - lnet_event_t *event_out); - - -int LNetEQWait(lnet_handle_eq_t eventq_in, - lnet_event_t *event_out); - -int LNetEQPoll(lnet_handle_eq_t *eventqs_in, - int neq_in, - int timeout_ms, - lnet_event_t *event_out, - int *which_eq_out); - -/* - * Data movement - */ -int LNetPut(lnet_nid_t self, - lnet_handle_md_t md_in, - lnet_ack_req_t ack_req_in, - lnet_process_id_t target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in, - __u64 hdr_data_in); - -int LNetGet(lnet_nid_t self, - lnet_handle_md_t md_in, - lnet_process_id_t target_in, - unsigned int portal_in, - __u64 match_bits_in, - unsigned int offset_in); - - -int LNetSetAsync(lnet_process_id_t id, int nasync); - -#endif diff --git a/lnet/include/lnet/darwin/.cvsignore b/lnet/include/lnet/darwin/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/include/lnet/darwin/Makefile.am b/lnet/include/lnet/darwin/Makefile.am deleted file mode 100644 index 409e1593f24dea6b9689354d8d7c05ad1772302b..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h diff --git a/lnet/include/lnet/darwin/api-support.h b/lnet/include/lnet/darwin/api-support.h deleted file mode 100644 index c411f1730701c0fb4a98bcf98f863139415bf1df..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/api-support.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __DARWIN_API_SUPPORT_H__ -#define __DARWIN_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <portals/api-support.h> instead -#endif - -#ifndef __KERNEL__ -# include <stdio.h> -# include <stdlib.h> -# include <unistd.h> -# include <time.h> - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include <signal.h> -# include <setjmp.h> -# include <time.h> - -# ifdef HAVE_LIBREADLINE -# include <readline/readline.h> -typedef VFunction rl_vintfunc_t; -typedef VFunction rl_voidfunc_t; -# endif -#endif - - -#endif diff --git a/lnet/include/lnet/darwin/lib-lnet.h b/lnet/include/lnet/darwin/lib-lnet.h deleted file mode 100644 index af4bc5de9b46cca2aa47b3dda83338cb3d46e3f4..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lib-lnet.h +++ /dev/null @@ -1,16 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LIB_LNET_H__ -#define __LNET_DARWIN_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#include <string.h> -#include <libcfs/libcfs.h> - -#undef LNET_ROUTER - -#endif diff --git a/lnet/include/lnet/darwin/lib-types.h b/lnet/include/lnet/darwin/lib-types.h deleted file mode 100644 index f1552fb7ba690fe3d55c6d15778f22ace7dbc497..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lib-types.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LIB_TYPES_H__ -#define __LNET_DARWIN_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#include <sys/types.h> -#include <libcfs/libcfs.h> -#include <libcfs/list.h> - -/* - * XXX Liang: - * - * Temporary fix, because lnet_me_free()->cfs_free->FREE() can be blocked in xnu, - * at then same time we've taken LNET_LOCK(), which is a spinlock. - * by using LNET_USE_LIB_FREELIST, we can avoid calling of FREE(). - * - * A better solution is moving lnet_me_free() out from LNET_LOCK, it's not hard - * but need to be very careful and take some time. - */ -#define LNET_USE_LIB_FREELIST - -#endif diff --git a/lnet/include/lnet/darwin/lnet.h b/lnet/include/lnet/darwin/lnet.h deleted file mode 100644 index 82a6127b5228ab18e3b094e8af02b4acf517c4af..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/darwin/lnet.h +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_DARWIN_LNET_H__ -#define __LNET_DARWIN_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -/* - * lnet.h - * - * User application interface file - */ - -#include <sys/types.h> -#include <sys/uio.h> - -#endif diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h deleted file mode 100644 index 700059cdd0cb4d0f93cf260028a0fc18b2dc7ffd..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lib-lnet.h +++ /dev/null @@ -1,679 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib-lnet.h - * - * Top level include for library side routines - */ - -#ifndef __LNET_LIB_LNET_H__ -#define __LNET_LIB_LNET_H__ - -#if defined(__linux__) -#include <lnet/linux/lib-lnet.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lib-lnet.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lib-lnet.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-types.h> - -extern lnet_t the_lnet; /* THE network */ - -static inline int lnet_is_wire_handle_none (lnet_handle_wire_t *wh) -{ - return (wh->wh_interface_cookie == LNET_WIRE_HANDLE_NONE.wh_interface_cookie && - wh->wh_object_cookie == LNET_WIRE_HANDLE_NONE.wh_object_cookie); -} - -static inline int lnet_md_exhausted (lnet_libmd_t *md) -{ - return (md->md_threshold == 0 || - ((md->md_options & LNET_MD_MAX_SIZE) != 0 && - md->md_offset + md->md_max_size > md->md_length)); -} - -static inline int lnet_md_unlinkable (lnet_libmd_t *md) -{ - /* Should unlink md when its refcount is 0 and either: - * - md has been flagged for deletion (by auto unlink or LNetM[DE]Unlink, - * in the latter case md may not be exhausted). - * - auto unlink is on and md is exhausted. - */ - if (md->md_refcount != 0) - return 0; - - if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) != 0) - return 1; - - return ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && - lnet_md_exhausted(md)); -} - -#ifdef __KERNEL__ -#define LNET_LOCK() spin_lock(&the_lnet.ln_lock) -#define LNET_UNLOCK() spin_unlock(&the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) mutex_down(m) -#define LNET_MUTEX_UP(m) mutex_up(m) -#else -# ifndef HAVE_LIBPTHREAD -#define LNET_SINGLE_THREADED_LOCK(l) \ -do { \ - LASSERT ((l) == 0); \ - (l) = 1; \ -} while (0) - -#define LNET_SINGLE_THREADED_UNLOCK(l) \ -do { \ - LASSERT ((l) == 1); \ - (l) = 0; \ -} while (0) - -#define LNET_LOCK() LNET_SINGLE_THREADED_LOCK(the_lnet.ln_lock) -#define LNET_UNLOCK() LNET_SINGLE_THREADED_UNLOCK(the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) LNET_SINGLE_THREADED_LOCK(*(m)) -#define LNET_MUTEX_UP(m) LNET_SINGLE_THREADED_UNLOCK(*(m)) -# else -#define LNET_LOCK() pthread_mutex_lock(&the_lnet.ln_lock) -#define LNET_UNLOCK() pthread_mutex_unlock(&the_lnet.ln_lock) -#define LNET_MUTEX_DOWN(m) pthread_mutex_lock(m) -#define LNET_MUTEX_UP(m) pthread_mutex_unlock(m) -# endif -#endif - -#define MAX_PORTALS 64 - -#ifdef LNET_USE_LIB_FREELIST - -#define MAX_MES 2048 -#define MAX_MDS 2048 -#define MAX_MSGS 2048 /* Outstanding messages */ -#define MAX_EQS 512 - -static inline void * -lnet_freelist_alloc (lnet_freelist_t *fl) -{ - /* ALWAYS called with liblock held */ - lnet_freeobj_t *o; - - if (list_empty (&fl->fl_list)) - return (NULL); - - o = list_entry (fl->fl_list.next, lnet_freeobj_t, fo_list); - list_del (&o->fo_list); - return ((void *)&o->fo_contents); -} - -static inline void -lnet_freelist_free (lnet_freelist_t *fl, void *obj) -{ - /* ALWAYS called with liblock held */ - lnet_freeobj_t *o = list_entry (obj, lnet_freeobj_t, fo_contents); - - list_add (&o->fo_list, &fl->fl_list); -} - - -static inline lnet_eq_t * -lnet_eq_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_eq_t *eq; - - LNET_LOCK(); - eq = (lnet_eq_t *)lnet_freelist_alloc(&the_lnet.ln_free_eqs); - LNET_UNLOCK(); - - return (eq); -} - -static inline void -lnet_eq_free (lnet_eq_t *eq) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free(&the_lnet.ln_free_eqs, eq); -} - -static inline lnet_libmd_t * -lnet_md_alloc (lnet_md_t *umd) -{ - /* NEVER called with liblock held */ - lnet_libmd_t *md; - - LNET_LOCK(); - md = (lnet_libmd_t *)lnet_freelist_alloc(&the_lnet.ln_free_mds); - LNET_UNLOCK(); - - return (md); -} - -static inline void -lnet_md_free (lnet_libmd_t *md) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free (&the_lnet.ln_free_mds, md); -} - -static inline lnet_me_t * -lnet_me_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_me_t *me; - - LNET_LOCK(); - me = (lnet_me_t *)lnet_freelist_alloc(&the_lnet.ln_free_mes); - LNET_UNLOCK(); - - return (me); -} - -static inline void -lnet_me_free (lnet_me_t *me) -{ - /* ALWAYS called with liblock held */ - lnet_freelist_free (&the_lnet.ln_free_mes, me); -} - -static inline lnet_msg_t * -lnet_msg_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_msg_t *msg; - - LNET_LOCK(); - msg = (lnet_msg_t *)lnet_freelist_alloc(&the_lnet.ln_free_msgs); - LNET_UNLOCK(); - - if (msg != NULL) { - /* NULL pointers, clear flags etc */ - memset (msg, 0, sizeof (*msg)); -#ifdef CRAY_XT3 - msg->msg_ev.uid = LNET_UID_ANY; -#endif - } - return(msg); -} - -static inline void -lnet_msg_free (lnet_msg_t *msg) -{ - /* ALWAYS called with liblock held */ - LASSERT (!msg->msg_onactivelist); - lnet_freelist_free(&the_lnet.ln_free_msgs, msg); -} - -#else - -static inline lnet_eq_t * -lnet_eq_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_eq_t *eq; - - LIBCFS_ALLOC(eq, sizeof(*eq)); - return (eq); -} - -static inline void -lnet_eq_free (lnet_eq_t *eq) -{ - /* ALWAYS called with liblock held */ - LIBCFS_FREE(eq, sizeof(*eq)); -} - -static inline lnet_libmd_t * -lnet_md_alloc (lnet_md_t *umd) -{ - /* NEVER called with liblock held */ - lnet_libmd_t *md; - int size; - unsigned int niov; - - if ((umd->options & LNET_MD_KIOV) != 0) { - niov = umd->length; - size = offsetof(lnet_libmd_t, md_iov.kiov[niov]); - } else { - niov = ((umd->options & LNET_MD_IOVEC) != 0) ? - umd->length : 1; - size = offsetof(lnet_libmd_t, md_iov.iov[niov]); - } - - LIBCFS_ALLOC(md, size); - - if (md != NULL) { - /* Set here in case of early free */ - md->md_options = umd->options; - md->md_niov = niov; - } - - return (md); -} - -static inline void -lnet_md_free (lnet_libmd_t *md) -{ - /* ALWAYS called with liblock held */ - int size; - - if ((md->md_options & LNET_MD_KIOV) != 0) - size = offsetof(lnet_libmd_t, md_iov.kiov[md->md_niov]); - else - size = offsetof(lnet_libmd_t, md_iov.iov[md->md_niov]); - - LIBCFS_FREE(md, size); -} - -static inline lnet_me_t * -lnet_me_alloc (void) -{ - /* NEVER called with liblock held */ - lnet_me_t *me; - - LIBCFS_ALLOC(me, sizeof(*me)); - return (me); -} - -static inline void -lnet_me_free(lnet_me_t *me) -{ - /* ALWAYS called with liblock held */ - LIBCFS_FREE(me, sizeof(*me)); -} - -static inline lnet_msg_t * -lnet_msg_alloc(void) -{ - /* NEVER called with liblock held */ - lnet_msg_t *msg; - - LIBCFS_ALLOC(msg, sizeof(*msg)); - - if (msg != NULL) { - /* NULL pointers, clear flags etc */ - memset (msg, 0, sizeof (*msg)); -#ifdef CRAY_XT3 - msg->msg_ev.uid = LNET_UID_ANY; -#endif - } - return (msg); -} - -static inline void -lnet_msg_free(lnet_msg_t *msg) -{ - /* ALWAYS called with liblock held */ - LASSERT (!msg->msg_onactivelist); - LIBCFS_FREE(msg, sizeof(*msg)); -} -#endif - -extern lnet_libhandle_t *lnet_lookup_cookie (__u64 cookie, int type); -extern void lnet_initialise_handle (lnet_libhandle_t *lh, int type); -extern void lnet_invalidate_handle (lnet_libhandle_t *lh); - -static inline void -lnet_eq2handle (lnet_handle_eq_t *handle, lnet_eq_t *eq) -{ - if (eq == NULL) { - *handle = LNET_EQ_NONE; - return; - } - - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline lnet_eq_t * -lnet_handle2eq (lnet_handle_eq_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_EQ); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_eq_t, eq_lh)); -} - -static inline void -lnet_md2handle (lnet_handle_md_t *handle, lnet_libmd_t *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline lnet_libmd_t * -lnet_handle2md (lnet_handle_md_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_libmd_t, md_lh)); -} - -static inline lnet_libmd_t * -lnet_wire_handle2md (lnet_handle_wire_t *wh) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh; - - if (wh->wh_interface_cookie != the_lnet.ln_interface_cookie) - return (NULL); - - lh = lnet_lookup_cookie(wh->wh_object_cookie, - LNET_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_libmd_t, md_lh)); -} - -static inline void -lnet_me2handle (lnet_handle_me_t *handle, lnet_me_t *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline lnet_me_t * -lnet_handle2me (lnet_handle_me_t *handle) -{ - /* ALWAYS called with liblock held */ - lnet_libhandle_t *lh = lnet_lookup_cookie(handle->cookie, - LNET_COOKIE_TYPE_ME); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lnet_me_t, me_lh)); -} - -static inline void -lnet_peer_addref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - lp->lp_refcount++; -} - -extern void lnet_destroy_peer_locked(lnet_peer_t *lp); - -static inline void -lnet_peer_decref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - lp->lp_refcount--; - if (lp->lp_refcount == 0) - lnet_destroy_peer_locked(lp); -} - -static inline int -lnet_isrouter(lnet_peer_t *lp) -{ - return lp->lp_rtr_refcount != 0; -} - -static inline void -lnet_ni_addref_locked(lnet_ni_t *ni) -{ - LASSERT (ni->ni_refcount > 0); - ni->ni_refcount++; -} - -static inline void -lnet_ni_addref(lnet_ni_t *ni) -{ - LNET_LOCK(); - lnet_ni_addref_locked(ni); - LNET_UNLOCK(); -} - -static inline void -lnet_ni_decref_locked(lnet_ni_t *ni) -{ - LASSERT (ni->ni_refcount > 0); - ni->ni_refcount--; - if (ni->ni_refcount == 0) - list_add_tail(&ni->ni_list, &the_lnet.ln_zombie_nis); -} - -static inline void -lnet_ni_decref(lnet_ni_t *ni) -{ - LNET_LOCK(); - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); -} - -static inline lnet_nid_t -lnet_ptlcompat_srcnid(lnet_nid_t src, lnet_nid_t dst) -{ - /* Give myself a portals srcnid if I'm sending to portals */ - if (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(dst) == 0) - return LNET_MKNID(0, LNET_NIDADDR(src)); - - return src; -} - -static inline int -lnet_ptlcompat_matchnid(lnet_nid_t lnet_nid, lnet_nid_t ptl_nid) -{ - return ((ptl_nid == lnet_nid) || - (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(ptl_nid) == 0 && - LNET_NETTYP(LNET_NIDNET(lnet_nid)) != LOLND && - LNET_NIDADDR(ptl_nid) == LNET_NIDADDR(lnet_nid))); -} - -static inline int -lnet_ptlcompat_matchnet(__u32 lnet_net, __u32 ptl_net) -{ - return ((ptl_net == lnet_net) || - (the_lnet.ln_ptlcompat > 0 && - ptl_net == 0 && - LNET_NETTYP(lnet_net) != LOLND)); -} - -static inline struct list_head * -lnet_nid2peerhash (lnet_nid_t nid) -{ - unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE; - - return &the_lnet.ln_peer_hash[idx]; -} - -extern lnd_t the_lolnd; - -#ifndef __KERNEL__ -/* unconditional registration */ -#define LNET_REGISTER_ULND(lnd) \ -do { \ - extern lnd_t lnd; \ - \ - lnet_register_lnd(&(lnd)); \ -} while (0) - -/* conditional registration */ -#define LNET_REGISTER_ULND_IF_PRESENT(lnd) \ -do { \ - extern lnd_t lnd __attribute__ ((weak, alias("the_lolnd"))); \ - \ - if (&(lnd) != &the_lolnd) \ - lnet_register_lnd(&(lnd)); \ -} while (0) -#endif - -#ifdef CRAY_XT3 -inline static void -lnet_set_msg_uid(lnet_ni_t *ni, lnet_msg_t *msg, lnet_uid_t uid) -{ - LASSERT (msg->msg_ev.uid == LNET_UID_ANY); - msg->msg_ev.uid = uid; -} -#endif - -extern lnet_ni_t *lnet_nid2ni_locked (lnet_nid_t nid); -extern lnet_ni_t *lnet_net2ni_locked (__u32 net); -static inline lnet_ni_t * -lnet_net2ni (__u32 net) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_net2ni_locked(net); - LNET_UNLOCK(); - - return ni; -} - -int lnet_notify(lnet_ni_t *ni, lnet_nid_t peer, int alive, time_t when); -int lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway_nid); -int lnet_check_routes(void); -int lnet_del_route(__u32 net, lnet_nid_t gw_nid); -void lnet_destroy_routes(void); -int lnet_get_route(int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive); -void lnet_proc_init(void); -void lnet_proc_fini(void); -void lnet_init_rtrpools(void); -int lnet_alloc_rtrpools(int im_a_router); -void lnet_free_rtrpools(void); -lnet_remotenet_t *lnet_find_net_locked (__u32 net); - -int lnet_islocalnid(lnet_nid_t nid); -int lnet_islocalnet(__u32 net); - -void lnet_enq_event_locked(lnet_eq_t *eq, lnet_event_t *ev); -void lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, - unsigned int offset, unsigned int len); -int lnet_send(lnet_nid_t nid, lnet_msg_t *msg); -void lnet_return_credits_locked (lnet_msg_t *msg); -void lnet_match_blocked_msg(lnet_libmd_t *md); -int lnet_parse (lnet_ni_t *ni, lnet_hdr_t *hdr, - lnet_nid_t fromnid, void *private, int rdma_req); -void lnet_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlen, unsigned int rlen); -lnet_msg_t *lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *get_msg); -void lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *msg, unsigned int len); -void lnet_finalize(lnet_ni_t *ni, lnet_msg_t *msg, int rc); - -char *lnet_msgtyp2str (int type); -void lnet_print_hdr (lnet_hdr_t * hdr); -int lnet_fail_nid(lnet_nid_t nid, unsigned int threshold); - -unsigned int lnet_iov_nob (unsigned int niov, struct iovec *iov); -int lnet_extract_iov (int dst_niov, struct iovec *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len); - -unsigned int lnet_kiov_nob (unsigned int niov, lnet_kiov_t *iov); -int lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len); - -void lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, - unsigned int doffset, - unsigned int nsiov, struct iovec *siov, - unsigned int soffset, unsigned int nob); -void lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, - unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, unsigned int nob); -void lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, - unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, - unsigned int iovoffset, unsigned int nob); -void lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, - unsigned int doffset, - unsigned int nskiov, lnet_kiov_t *skiov, - unsigned int soffset, unsigned int nob); - -static inline void -lnet_copy_iov2flat(int dlen, void *dest, unsigned int doffset, - unsigned int nsiov, struct iovec *siov, unsigned int soffset, - unsigned int nob) -{ - struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen}; - - lnet_copy_iov2iov(1, &diov, doffset, - nsiov, siov, soffset, nob); -} - -static inline void -lnet_copy_kiov2flat(int dlen, void *dest, unsigned int doffset, - unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset, - unsigned int nob) -{ - struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen}; - - lnet_copy_kiov2iov(1, &diov, doffset, - nsiov, skiov, soffset, nob); -} - -static inline void -lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset, - int slen, void *src, unsigned int soffset, unsigned int nob) -{ - struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen}; - lnet_copy_iov2iov(ndiov, diov, doffset, - 1, &siov, soffset, nob); -} - -static inline void -lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset, - int slen, void *src, unsigned int soffset, unsigned int nob) -{ - struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen}; - lnet_copy_iov2kiov(ndiov, dkiov, doffset, - 1, &siov, soffset, nob); -} - -void lnet_me_unlink(lnet_me_t *me); - -void lnet_md_unlink(lnet_libmd_t *md); -void lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd); - -void lnet_register_lnd(lnd_t *lnd); -void lnet_unregister_lnd(lnd_t *lnd); -int lnet_set_ip_niaddr (lnet_ni_t *ni); - -#ifdef __KERNEL__ -int lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port); -void lnet_connect_console_error(int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int port); -int lnet_count_acceptor_nis(lnet_ni_t **first_ni); -int lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic); -int lnet_acceptor_timeout(void); -int lnet_acceptor_port(void); -#endif - -int lnet_acceptor_start(void); -void lnet_acceptor_stop(void); - -int lnet_peers_start_down(void); -int lnet_router_checker_start(void); -void lnet_router_checker_stop(void); - -int lnet_ping_target_init(void); -void lnet_ping_target_fini(void); -int lnet_ping(lnet_process_id_t id, int timeout_ms, - lnet_process_id_t *ids, int n_ids); - -int lnet_parse_ip2nets (char **networksp, char *ip2nets); -int lnet_parse_routes (char *route_str, int *im_a_router); -int lnet_parse_networks (struct list_head *nilist, char *networks); - -int lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid); -lnet_peer_t *lnet_find_peer_locked (lnet_nid_t nid); -void lnet_clear_peer_table(void); -void lnet_destroy_peer_table(void); -int lnet_create_peer_table(void); -void lnet_debug_peer(lnet_nid_t nid); - -#endif diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h deleted file mode 100644 index 6c6dfd341ad9310fb4bf9ca18481e65b3629f437..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lib-types.h +++ /dev/null @@ -1,552 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * p30/lib-types.h - * - * Types used by the library side routines that do not need to be - * exposed to the user application - */ - -#ifndef __LNET_LIB_TYPES_H__ -#define __LNET_LIB_TYPES_H__ - -#if defined(__linux__) -#include <lnet/linux/lib-types.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lib-types.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lib-types.h> -#else -#error Unsupported Operating System -#endif - -#include <libcfs/libcfs.h> -#include <libcfs/list.h> -#include <lnet/types.h> - -#define WIRE_ATTR __attribute__((packed)) - -/* The wire handle's interface cookie only matches one network interface in - * one epoch (i.e. new cookie when the interface restarts or the node - * reboots). The object cookie only matches one object on that interface - * during that object's lifetime (i.e. no cookie re-use). */ -typedef struct { - __u64 wh_interface_cookie; - __u64 wh_object_cookie; -} WIRE_ATTR lnet_handle_wire_t; - -/* byte-flip insensitive! */ -#define LNET_WIRE_HANDLE_NONE \ -((const lnet_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) - -typedef enum { - LNET_MSG_ACK = 0, - LNET_MSG_PUT, - LNET_MSG_GET, - LNET_MSG_REPLY, - LNET_MSG_HELLO, -} lnet_msg_type_t; - -/* The variant fields of the portals message header are aligned on an 8 - * byte boundary in the message header. Note that all types used in these - * wire structs MUST be fixed size and the smaller types are placed at the - * end. */ -typedef struct lnet_ack { - lnet_handle_wire_t dst_wmd; - __u64 match_bits; - __u32 mlength; -} WIRE_ATTR lnet_ack_t; - -typedef struct lnet_put { - lnet_handle_wire_t ack_wmd; - __u64 match_bits; - __u64 hdr_data; - __u32 ptl_index; - __u32 offset; -} WIRE_ATTR lnet_put_t; - -typedef struct lnet_get { - lnet_handle_wire_t return_wmd; - __u64 match_bits; - __u32 ptl_index; - __u32 src_offset; - __u32 sink_length; -} WIRE_ATTR lnet_get_t; - -typedef struct lnet_reply { - lnet_handle_wire_t dst_wmd; -} WIRE_ATTR lnet_reply_t; - -typedef struct lnet_hello { - __u64 incarnation; - __u32 type; -} WIRE_ATTR lnet_hello_t; - -typedef struct { - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - lnet_pid_t dest_pid; - lnet_pid_t src_pid; - __u32 type; /* lnet_msg_type_t */ - __u32 payload_length; /* payload data to follow */ - /*<------__u64 aligned------->*/ - union { - lnet_ack_t ack; - lnet_put_t put; - lnet_get_t get; - lnet_reply_t reply; - lnet_hello_t hello; - } msg; -} WIRE_ATTR lnet_hdr_t; - -/* A HELLO message contains a magic number and protocol version - * code in the header's dest_nid, the peer's NID in the src_nid, and - * LNET_MSG_HELLO in the type field. All other common fields are zero - * (including payload_size; i.e. no payload). - * This is for use by byte-stream LNDs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID. These LNDs should - * exchange HELLO messages when a connection is first established. Individual - * LNDs can put whatever else they fancy in lnet_hdr_t::msg. - */ -typedef struct { - __u32 magic; /* LNET_PROTO_TCP_MAGIC */ - __u16 version_major; /* increment on incompatible change */ - __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR lnet_magicversion_t; - -/* PROTO MAGIC for LNDs */ -#define LNET_PROTO_IB_MAGIC 0x0be91b91 -#define LNET_PROTO_OPENIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_IIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_VIB_MAGIC LNET_PROTO_IB_MAGIC -#define LNET_PROTO_RA_MAGIC 0x0be91b92 -#define LNET_PROTO_QSW_MAGIC 0x0be91b93 -#define LNET_PROTO_TCP_MAGIC 0xeebc0ded -#define LNET_PROTO_PTL_MAGIC 0x50746C4E /* 'PtlN' unique magic */ -#define LNET_PROTO_GM_MAGIC 0x6d797269 /* 'myri'! */ -#define LNET_PROTO_MX_MAGIC 0x4d583130 /* 'MX10'! */ -#define LNET_PROTO_ACCEPTOR_MAGIC 0xacce7100 -#define LNET_PROTO_PING_MAGIC 0x70696E67 /* 'ping' */ - -/* Placeholder for a future "unified" protocol across all LNDs */ -/* Current LNDs that receive a request with this magic will respond with a - * "stub" reply using their current protocol */ -#define LNET_PROTO_MAGIC 0x45726963 /* ! */ - - -#define LNET_PROTO_TCP_VERSION_MAJOR 1 -#define LNET_PROTO_TCP_VERSION_MINOR 0 - -/* Acceptor connection request */ -typedef struct { - __u32 acr_magic; /* PTL_ACCEPTOR_PROTO_MAGIC */ - __u32 acr_version; /* protocol version */ - __u64 acr_nid; /* target NID */ -} WIRE_ATTR lnet_acceptor_connreq_t; - -#define LNET_PROTO_ACCEPTOR_VERSION 1 - -/* forward refs */ -struct lnet_libmd; - -typedef struct lnet_msg { - struct list_head msg_activelist; - struct list_head msg_list; /* Q for credits/MD */ - - lnet_process_id_t msg_target; - __u32 msg_type; - - unsigned int msg_target_is_router:1; /* sending to a router */ - unsigned int msg_routing:1; /* being forwarded */ - unsigned int msg_ack:1; /* ack on finalize (PUT) */ - unsigned int msg_sending:1; /* outgoing message */ - unsigned int msg_receiving:1; /* being received */ - unsigned int msg_delayed:1; /* had to Q for buffer or tx credit */ - unsigned int msg_txcredit:1; /* taken an NI send credit */ - unsigned int msg_peertxcredit:1; /* taken a peer send credit */ - unsigned int msg_rtrcredit:1; /* taken a globel router credit */ - unsigned int msg_peerrtrcredit:1; /* taken a peer router credit */ - unsigned int msg_onactivelist:1; /* on the activelist */ - - struct lnet_peer *msg_txpeer; /* peer I'm sending to */ - struct lnet_peer *msg_rxpeer; /* peer I received from */ - - void *msg_private; - struct lnet_libmd *msg_md; - - unsigned int msg_len; - unsigned int msg_wanted; - unsigned int msg_offset; - unsigned int msg_niov; - struct iovec *msg_iov; - lnet_kiov_t *msg_kiov; - - lnet_event_t msg_ev; - lnet_hdr_t msg_hdr; -} lnet_msg_t; - - -typedef struct lnet_libhandle { - struct list_head lh_hash_chain; - __u64 lh_cookie; -} lnet_libhandle_t; - -#define lh_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -typedef struct lnet_eq { - struct list_head eq_list; - lnet_libhandle_t eq_lh; - lnet_seq_t eq_enq_seq; - lnet_seq_t eq_deq_seq; - unsigned int eq_size; - lnet_event_t *eq_events; - int eq_refcount; - lnet_eq_handler_t eq_callback; -} lnet_eq_t; - -typedef struct lnet_me { - struct list_head me_list; - lnet_libhandle_t me_lh; - lnet_process_id_t me_match_id; - unsigned int me_portal; - __u64 me_match_bits; - __u64 me_ignore_bits; - lnet_unlink_t me_unlink; - struct lnet_libmd *me_md; -} lnet_me_t; - -typedef struct lnet_libmd { - struct list_head md_list; - lnet_libhandle_t md_lh; - lnet_me_t *md_me; - char *md_start; - unsigned int md_offset; - unsigned int md_length; - unsigned int md_max_size; - int md_threshold; - int md_refcount; - unsigned int md_options; - unsigned int md_flags; - void *md_user_ptr; - lnet_eq_t *md_eq; - void *md_addrkey; - unsigned int md_niov; /* # frags */ - union { - struct iovec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; - } md_iov; -} lnet_libmd_t; - -#define LNET_MD_FLAG_ZOMBIE (1 << 0) -#define LNET_MD_FLAG_AUTO_UNLINK (1 << 1) - -#ifdef LNET_USE_LIB_FREELIST -typedef struct -{ - void *fl_objs; /* single contiguous array of objects */ - int fl_nobjs; /* the number of them */ - int fl_objsize; /* the size (including overhead) of each of them */ - struct list_head fl_list; /* where they are enqueued */ -} lnet_freelist_t; - -typedef struct -{ - struct list_head fo_list; /* enqueue on fl_list */ - void *fo_contents; /* aligned contents */ -} lnet_freeobj_t; -#endif - -typedef struct { - /* info about peers we are trying to fail */ - struct list_head tp_list; /* ln_test_peers */ - lnet_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -} lnet_test_peer_t; - -#define LNET_COOKIE_TYPE_MD 1 -#define LNET_COOKIE_TYPE_ME 2 -#define LNET_COOKIE_TYPE_EQ 3 -#define LNET_COOKIE_TYPES 4 -/* LNET_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (LNET_COOKIE_TYPES - 1) */ - -struct lnet_ni; /* forward ref */ - -typedef struct lnet_lnd -{ - /* fields managed by portals */ - struct list_head lnd_list; /* stash in the LND table */ - int lnd_refcount; /* # active instances */ - - /* fields initialised by the LND */ - unsigned int lnd_type; - - int (*lnd_startup) (struct lnet_ni *ni); - void (*lnd_shutdown) (struct lnet_ni *ni); - int (*lnd_ctl)(struct lnet_ni *ni, unsigned int cmd, void *arg); - - /* In data movement APIs below, payload buffers are described as a set - * of 'niov' fragments which are... - * EITHER - * in virtual memory (struct iovec *iov != NULL) - * OR - * in pages (kernel only: plt_kiov_t *kiov != NULL). - * The LND may NOT overwrite these fragment descriptors. - * An 'offset' and may specify a byte offset within the set of - * fragments to start from - */ - - /* Start sending a preformatted message. 'private' is NULL for PUT and - * GET messages; otherwise this is a response to an incoming message - * and 'private' is the 'private' passed to lnet_parse(). Return - * non-zero for immediate failure, otherwise complete later with - * lnet_finalize() */ - int (*lnd_send)(struct lnet_ni *ni, void *private, lnet_msg_t *msg); - - /* Start receiving 'mlen' bytes of payload data, skipping the following - * 'rlen' - 'mlen' bytes. 'private' is the 'private' passed to - * lnet_parse(). Return non-zero for immedaite failure, otherwise - * complete later with lnet_finalize(). This also gives back a receive - * credit if the LND does flow control. */ - int (*lnd_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - - /* lnet_parse() has had to delay processing of this message - * (e.g. waiting for a forwarding buffer or send credits). Give the - * LND a chance to free urgently needed resources. If called, return 0 - * for success and do NOT give back a receive credit; that has to wait - * until lnd_recv() gets called. On failure return < 0 and - * release resources; lnd_recv() will not be called. */ - int (*lnd_eager_recv)(struct lnet_ni *ni, void *private, lnet_msg_t *msg, - void **new_privatep); - - /* notification of peer health */ - void (*lnd_notify)(struct lnet_ni *ni, lnet_nid_t peer, int alive); - -#ifdef __KERNEL__ - /* accept a new connection */ - int (*lnd_accept)(struct lnet_ni *ni, cfs_socket_t *sock); -#else - /* wait for something to happen */ - void (*lnd_wait)(struct lnet_ni *ni, int milliseconds); - - /* ensure non-RDMA messages can be received outside liblustre */ - int (*lnd_setasync)(struct lnet_ni *ni, lnet_process_id_t id, int nasync); -#endif -} lnd_t; - -#define LNET_MAX_INTERFACES 16 - -typedef struct lnet_ni { - struct list_head ni_list; /* chain on ln_nis */ - struct list_head ni_txq; /* messages waiting for tx credits */ - int ni_maxtxcredits; /* # tx credits */ - int ni_txcredits; /* # tx credits free */ - int ni_mintxcredits; /* lowest it's been */ - int ni_peertxcredits; /* # per-peer send credits */ - lnet_nid_t ni_nid; /* interface's NID */ - void *ni_data; /* instance-specific data */ - lnd_t *ni_lnd; /* procedural interface */ - int ni_refcount; /* reference count */ - char *ni_interfaces[LNET_MAX_INTERFACES]; /* equivalent interfaces to use */ -} lnet_ni_t; - -typedef struct lnet_peer { - struct list_head lp_hashlist; /* chain on peer hash */ - struct list_head lp_txq; /* messages blocking for tx credits */ - struct list_head lp_rtrq; /* messages blocking for router credits */ - struct list_head lp_rtr_list; /* chain on router list */ - int lp_txcredits; /* # tx credits available */ - int lp_mintxcredits; /* low water mark */ - int lp_rtrcredits; /* # router credits */ - int lp_minrtrcredits; /* low water mark */ - unsigned int lp_alive:1; /* alive/dead? */ - unsigned int lp_notify:1; /* notification outstanding? */ - unsigned int lp_notifylnd:1; /* outstanding notification for LND? */ - unsigned int lp_notifying:1; /* some thread is handling notification */ - unsigned int lp_ping_notsent; /* SEND event outstanding from ping */ - int lp_alive_count; /* # times router went dead<->alive */ - long lp_txqnob; /* bytes queued for sending */ - time_t lp_timestamp; /* time of last aliveness news */ - time_t lp_ping_timestamp; /* time of last ping attempt */ - time_t lp_ping_deadline; /* != 0 if ping reply expected */ - lnet_ni_t *lp_ni; /* interface peer is on */ - lnet_nid_t lp_nid; /* peer's NID */ - int lp_refcount; /* # refs */ - int lp_rtr_refcount; /* # refs from lnet_route_t::lr_gateway */ -} lnet_peer_t; - -typedef struct { - struct list_head lr_list; /* chain on net */ - lnet_peer_t *lr_gateway; /* router node */ -} lnet_route_t; - -typedef struct { - struct list_head lrn_list; /* chain on ln_remote_nets */ - struct list_head lrn_routes; /* routes to me */ - __u32 lrn_net; /* my net number */ - unsigned int lrn_hops; /* how far I am */ -} lnet_remotenet_t; - -typedef struct { - struct list_head rbp_bufs; /* my free buffer pool */ - struct list_head rbp_msgs; /* messages blocking for a buffer */ - int rbp_npages; /* # pages in each buffer */ - int rbp_nbuffers; /* # buffers */ - int rbp_credits; /* # free buffers / blocked messages */ - int rbp_mincredits; /* low water mark */ -} lnet_rtrbufpool_t; - -typedef struct { - struct list_head rb_list; /* chain on rbp_bufs */ - lnet_rtrbufpool_t *rb_pool; /* owning pool */ - lnet_kiov_t rb_kiov[0]; /* the buffer space */ -} lnet_rtrbuf_t; - -typedef struct { - __u32 msgs_alloc; - __u32 msgs_max; - __u32 errors; - __u32 send_count; - __u32 recv_count; - __u32 route_count; - __u32 drop_count; - __u64 send_length; - __u64 recv_length; - __u64 route_length; - __u64 drop_length; -} lnet_counters_t; - -#define LNET_PEER_HASHSIZE 503 /* prime! */ - -#define LNET_NRBPOOLS 3 /* # different router buffer pools */ - -#define LNET_PROTO_PING_MATCHBITS 0x8000000000000000LL -#define LNET_PROTO_PING_VERSION 1 -typedef struct { - __u32 pi_magic; - __u32 pi_version; - lnet_pid_t pi_pid; - __u32 pi_nnids; - lnet_nid_t pi_nid[0]; -} WIRE_ATTR lnet_ping_info_t; - -/* Options for lnet_portal_t::ptl_options */ -#define LNET_PTL_LAZY (1 << 0) -typedef struct { - struct list_head ptl_ml; /* match list */ - struct list_head ptl_msgq; /* messages blocking for MD */ - __u64 ptl_msgq_version; /* validity stamp */ - unsigned int ptl_options; -} lnet_portal_t; - -/* Router Checker */ -/* < 0 == startup error */ -#define LNET_RC_STATE_SHUTDOWN 0 /* not started */ -#define LNET_RC_STATE_RUNNING 1 /* started up OK */ -#define LNET_RC_STATE_STOPTHREAD 2 /* telling thread to stop */ -#define LNET_RC_STATE_UNLINKING 3 /* unlinking RC MD */ -#define LNET_RC_STATE_UNLINKED 4 /* RC's MD has been unlinked */ - -typedef struct -{ - /* Stuff initialised at LNetInit() */ - int ln_init; /* LNetInit() called? */ - int ln_refcount; /* LNetNIInit/LNetNIFini counter */ - int ln_niinit_self; /* Have I called LNetNIInit myself? */ - - int ln_ptlcompat; /* do I support talking to portals? */ - - struct list_head ln_lnds; /* registered LNDs */ - -#ifdef __KERNEL__ - spinlock_t ln_lock; - cfs_waitq_t ln_waitq; - struct semaphore ln_api_mutex; - struct semaphore ln_lnd_mutex; -#else -# ifndef HAVE_LIBPTHREAD - int ln_lock; - int ln_api_mutex; - int ln_lnd_mutex; -# else - pthread_cond_t ln_cond; - pthread_mutex_t ln_lock; - pthread_mutex_t ln_api_mutex; - pthread_mutex_t ln_lnd_mutex; -# endif -#endif - - /* Stuff initialised at LNetNIInit() */ - - int ln_shutdown; /* shutdown in progress */ - int ln_nportals; /* # portals */ - lnet_portal_t *ln_portals; /* the vector of portals */ - - lnet_pid_t ln_pid; /* requested pid */ - - struct list_head ln_nis; /* LND instances */ - lnet_ni_t *ln_loni; /* the loopback NI */ - lnet_ni_t *ln_eqwaitni; /* NI to wait for events in */ - struct list_head ln_zombie_nis; /* dying LND instances */ - int ln_nzombie_nis; /* # of NIs to wait for */ - - struct list_head ln_remote_nets; /* remote networks with routes to them */ - __u64 ln_remote_nets_version; /* validity stamp */ - - struct list_head ln_routers; /* list of all known routers */ - __u64 ln_routers_version; /* validity stamp */ - - struct list_head *ln_peer_hash; /* NID->peer hash */ - int ln_npeers; /* # peers extant */ - int ln_peertable_version; /* /proc validity stamp */ - - int ln_routing; /* am I a router? */ - lnet_rtrbufpool_t ln_rtrpools[LNET_NRBPOOLS]; /* router buffer pools */ - - int ln_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ln_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ln_next_object_cookie; /* cookie generator */ - __u64 ln_interface_cookie; /* uniquely identifies this ni in this epoch */ - - char *ln_network_tokens; /* space for network names */ - int ln_network_tokens_nob; - - int ln_testprotocompat; /* test protocol compatibility flags */ - - struct list_head ln_finalizeq; /* msgs waiting to complete finalizing */ -#ifdef __KERNEL__ - void **ln_finalizers; /* threads doing finalization */ - int ln_nfinalizers; /* max # threads finalizing */ -#else - int ln_finalizing; -#endif - struct list_head ln_test_peers; /* failure simulation */ - - lnet_handle_md_t ln_ping_target_md; - lnet_handle_eq_t ln_ping_target_eq; - lnet_ping_info_t *ln_ping_info; - -#ifdef __KERNEL__ - int ln_rc_state; /* router checker startup/shutdown state */ - struct semaphore ln_rc_signal; /* serialise startup/shutdown */ - lnet_handle_eq_t ln_rc_eqh; /* router checker's event queue */ -#endif - -#ifdef LNET_USE_LIB_FREELIST - lnet_freelist_t ln_free_mes; - lnet_freelist_t ln_free_msgs; - lnet_freelist_t ln_free_mds; - lnet_freelist_t ln_free_eqs; -#endif - struct list_head ln_active_msgs; - struct list_head ln_active_mds; - struct list_head ln_active_eqs; - - lnet_counters_t ln_counters; -} lnet_t; - -#endif diff --git a/lnet/include/lnet/linux/.cvsignore b/lnet/include/lnet/linux/.cvsignore deleted file mode 100644 index 3dda72986fc5af262451a760393b3a7065938c80..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile.in -Makefile diff --git a/lnet/include/lnet/linux/Makefile.am b/lnet/include/lnet/linux/Makefile.am deleted file mode 100644 index 409e1593f24dea6b9689354d8d7c05ad1772302b..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h diff --git a/lnet/include/lnet/linux/api-support.h b/lnet/include/lnet/linux/api-support.h deleted file mode 100644 index bec6e34aa1e32fcb8f999cc9ced2e1a9028c9376..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/api-support.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __LINUX_API_SUPPORT_H__ -#define __LINUX_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <lnet /api-support.h> instead -#endif - -#ifndef __KERNEL__ -# include <stdio.h> -# include <stdlib.h> -# include <unistd.h> -# include <time.h> - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include <signal.h> -# include <setjmp.h> -# include <time.h> - -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include <readline/readline.h> - -/* readline.h pulls in a #define that conflicts with one in libcfs.h */ -#undef RETURN - -/* completion_matches() is #if 0-ed out in modern glibc */ -#ifndef completion_matches -# define completion_matches rl_completion_matches -#endif - -#endif /* HAVE_LIBREADLINE */ - -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); - -#endif /* !__KERNEL__ */ - -#endif diff --git a/lnet/include/lnet/linux/lib-lnet.h b/lnet/include/lnet/linux/lib-lnet.h deleted file mode 100644 index 9c38fd3ff226349807dd6d5b9552b56f85d4e71e..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lib-lnet.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LIB_LNET_H__ -#define __LNET_LINUX_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#ifdef __KERNEL__ -# include <asm/page.h> -# include <linux/string.h> -# include <asm/io.h> -# include <libcfs/kp30.h> - -static inline __u64 -lnet_page2phys (struct page *p) -{ - /* compiler optimizer will elide unused branches */ - - switch (sizeof(typeof(page_to_phys(p)))) { - case 4: - /* page_to_phys returns a 32 bit physical address. This must - * be a 32 bit machine with <= 4G memory and we must ensure we - * don't sign extend when converting to 64 bits. */ - return (unsigned long)page_to_phys(p); - - case 8: - /* page_to_phys returns a 64 bit physical address :) */ - return page_to_phys(p); - - default: - LBUG(); - return 0; - } -} - -#else /* __KERNEL__ */ -# include <libcfs/list.h> -# include <string.h> -# ifdef HAVE_LIBPTHREAD -# include <pthread.h> -# endif -#endif - -#define LNET_ROUTER - -#endif /* __LNET_LINUX_LIB_LNET_H__ */ diff --git a/lnet/include/lnet/linux/lib-types.h b/lnet/include/lnet/linux/lib-types.h deleted file mode 100644 index 7d28839d97b93387d6aaa34c18ee8e598135c1bb..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lib-types.h +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LIB_TYPES_H__ -#define __LNET_LINUX_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#ifdef __KERNEL__ -# include <linux/uio.h> -# include <linux/smp_lock.h> -# include <linux/types.h> -#else -# define LNET_USE_LIB_FREELIST -# include <sys/types.h> -#endif - -#endif diff --git a/lnet/include/lnet/linux/lnet.h b/lnet/include/lnet/linux/lnet.h deleted file mode 100644 index b1aab840c6828622cbb2c6dbdd8c111e9d57a471..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/linux/lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LNET_H__ -#define __LNET_LINUX_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -/* - * lnet.h - * - * User application interface file - */ - -#if defined (__KERNEL__) -#include <linux/uio.h> -#include <linux/types.h> -#else -#include <sys/types.h> -#include <sys/uio.h> -#endif - -#endif diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h deleted file mode 100644 index 819c5241f7c926870dba28e137de36cb7efdb279..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_H__ -#define __LNET_H__ - -/* - * lnet.h - * - * User application interface file - */ -#if defined(__linux__) -#include <lnet/linux/lnet.h> -#elif defined(__APPLE__) -#include <lnet/darwin/lnet.h> -#elif defined(__WINNT__) -#include <lnet/winnt/lnet.h> -#else -#error Unsupported Operating System -#endif - -#include <lnet/types.h> -#include <lnet/api.h> - -#endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h deleted file mode 100644 index cb66b9de553ad65aeac124a60ec97eaaf195c604..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/lnetctl.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * header for libptlctl.a - */ -#ifndef _PTLCTL_H_ -#define _PTLCTL_H_ - -#include <lnet/types.h> -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -#define LNET_DEV_ID 0 -#define LNET_DEV_PATH "/dev/lnet" -#define LNET_DEV_MAJOR 10 -#define LNET_DEV_MINOR 240 -#define OBD_DEV_ID 1 -#define OBD_DEV_PATH "/dev/obd" -#define OBD_DEV_MAJOR 10 -#define OBD_DEV_MINOR 241 -#define SMFS_DEV_ID 2 -#define SMFS_DEV_PATH "/dev/snapdev" -#define SMFS_DEV_MAJOR 10 -#define SMFS_DEV_MINOR 242 - -int ptl_initialize(int argc, char **argv); -int jt_ptl_network(int argc, char **argv); -int jt_ptl_list_nids(int argc, char **argv); -int jt_ptl_which_nid(int argc, char **argv); -int jt_ptl_print_interfaces(int argc, char **argv); -int jt_ptl_add_interface(int argc, char **argv); -int jt_ptl_del_interface(int argc, char **argv); -int jt_ptl_print_peers (int argc, char **argv); -int jt_ptl_add_peer (int argc, char **argv); -int jt_ptl_del_peer (int argc, char **argv); -int jt_ptl_print_connections (int argc, char **argv); -int jt_ptl_disconnect(int argc, char **argv); -int jt_ptl_push_connection(int argc, char **argv); -int jt_ptl_print_active_txs(int argc, char **argv); -int jt_ptl_ping(int argc, char **argv); -int jt_ptl_ping_test(int argc, char **argv); -int jt_ptl_mynid(int argc, char **argv); -int jt_ptl_add_uuid(int argc, char **argv); -int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ -int jt_ptl_close_uuid(int argc, char **argv); -int jt_ptl_del_uuid(int argc, char **argv); -int jt_ptl_add_route (int argc, char **argv); -int jt_ptl_del_route (int argc, char **argv); -int jt_ptl_notify_router (int argc, char **argv); -int jt_ptl_print_routes (int argc, char **argv); -int jt_ptl_fail_nid (int argc, char **argv); -int jt_ptl_lwt(int argc, char **argv); -int jt_ptl_testprotocompat(int argc, char **argv); -int jt_ptl_memhog(int argc, char **argv); - -int dbg_initialize(int argc, char **argv); -int jt_dbg_filter(int argc, char **argv); -int jt_dbg_show(int argc, char **argv); -int jt_dbg_list(int argc, char **argv); -int jt_dbg_debug_kernel(int argc, char **argv); -int jt_dbg_debug_daemon(int argc, char **argv); -int jt_dbg_debug_file(int argc, char **argv); -int jt_dbg_clear_debug_buf(int argc, char **argv); -int jt_dbg_mark_debug_buf(int argc, char **argv); -int jt_dbg_modules(int argc, char **argv); -int jt_dbg_panic(int argc, char **argv); - -/* l_ioctl.c */ -typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf); -void set_ioc_handler(ioc_handler_t *handler); -int register_ioc_dev(int dev_id, const char * dev_name, int major, int minor); -void unregister_ioc_dev(int dev_id); -int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, unsigned int opc, void *buf); -int parse_dump(char * dump_file, ioc_handler_t ioc_func); -int jt_ioc_dump(int argc, char **argv); -extern char *dump_filename; -int dump(int dev_id, unsigned int opc, void *buf); - -#endif diff --git a/lnet/include/lnet/ptllnd.h b/lnet/include/lnet/ptllnd.h deleted file mode 100755 index 2c6263c2ac5df82795b341baa81377457d844396..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/ptllnd.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -/* - * The PTLLND was designed to support Portals with - * Lustre and non-lustre UNLINK semantics. - * However for now the two targets are Cray Portals - * on the XT3 and Lustre Portals (for testing) both - * have Lustre UNLINK semantics, so this is defined - * by default. - */ -#define LUSTRE_PORTALS_UNLINK_SEMANTICS - - -#ifdef _USING_LUSTRE_PORTALS_ - -/* NIDs are 64-bits on Lustre Portals */ -#define FMT_NID LPU64 -#define FMT_PID "%d" - -/* When using Lustre Portals Lustre completion semantics are imlicit*/ -#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 - -#else /* _USING_CRAY_PORTALS_ */ - -/* NIDs are integers on Cray Portals */ -#define FMT_NID "%u" -#define FMT_PID "%d" - -/* When using Cray Portals this is defined in the Cray Portals Header*/ -/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */ - -/* Can compare handles directly on Cray Portals */ -#define PtlHandleIsEqual(a,b) ((a) == (b)) - -/* Diffrent error types on Cray Portals*/ -#define ptl_err_t ptl_ni_fail_t - -/* - * The Cray Portals has no maximum number of IOVs. The - * maximum is limited only my memory and size of the - * int parameters (2^31-1). - * Lustre only really require that the underyling - * implemenation to support at least LNET_MAX_IOV, - * so for Cray portals we can safely just use that - * value here. - * - */ -#define PTL_MD_MAX_IOV LNET_MAX_IOV - -#endif - -#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID - -/* Align incoming small request messages to an 8 byte boundary if this is - * supported to avoid alignment issues on some architectures */ -#ifndef PTL_MD_LOCAL_ALIGN8 -# define PTL_MD_LOCAL_ALIGN8 0 -#endif diff --git a/lnet/include/lnet/ptllnd_wire.h b/lnet/include/lnet/ptllnd_wire.h deleted file mode 100644 index ca9046c15f89e6fa4a6c794ee1ddcd2bb83dc9d6..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/ptllnd_wire.h +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -/* Minimum buffer size that any peer will post to receive ptllnd messages */ -#define PTLLND_MIN_BUFFER_SIZE 256 - -/************************************************************************ - * Tunable defaults that {u,k}lnds/ptllnd should have in common. - */ - -#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */ -#define PTLLND_PID 9 /* The Portals PID */ -#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */ - -/* Default buffer size for kernel ptllnds (guaranteed eager) */ -#define PTLLND_MAX_KLND_MSG_SIZE 512 - -/* Default buffer size for catamount ptllnds (not guaranteed eager) - large - * enough to avoid RDMA for anything sent while control is not in liblustre */ -#define PTLLND_MAX_ULND_MSG_SIZE 512 - - -/************************************************************************ - * Portals LND Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved - * above is for bulk data transfer */ -#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */ - -typedef struct -{ - lnet_hdr_t kptlim_hdr; /* portals header */ - char kptlim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kptl_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t kptlrm_hdr; /* portals header */ - __u64 kptlrm_matchbits; /* matchbits */ -} WIRE_ATTR kptl_rdma_msg_t; - -typedef struct -{ - __u64 kptlhm_matchbits; /* matchbits */ - __u32 kptlhm_max_msg_size; /* max message size */ -} WIRE_ATTR kptl_hello_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ptlm_magic; /* I'm a Portals LND message */ - __u16 ptlm_version; /* this is my version number */ - __u8 ptlm_type; /* the message type */ - __u8 ptlm_credits; /* returned credits */ - __u32 ptlm_nob; /* # bytes in whole message */ - __u32 ptlm_cksum; /* checksum (0 == no checksum) */ - __u64 ptlm_srcnid; /* sender's NID */ - __u64 ptlm_srcstamp; /* sender's incarnation */ - __u64 ptlm_dstnid; /* destination's NID */ - __u64 ptlm_dststamp; /* destination's incarnation */ - __u32 ptlm_srcpid; /* sender's PID */ - __u32 ptlm_dstpid; /* destination's PID */ - - union { - kptl_immediate_msg_t immediate; - kptl_rdma_msg_t rdma; - kptl_hello_msg_t hello; - } WIRE_ATTR ptlm_u; - -} kptl_msg_t; - -#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC -#define PTLLND_MSG_VERSION 0x04 - -#define PTLLND_RDMA_OK 0x00 -#define PTLLND_RDMA_FAIL 0x01 - -#define PTLLND_MSG_TYPE_INVALID 0x00 -#define PTLLND_MSG_TYPE_PUT 0x01 -#define PTLLND_MSG_TYPE_GET 0x02 -#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/ -#define PTLLND_MSG_TYPE_NOOP 0x04 -#define PTLLND_MSG_TYPE_HELLO 0x05 -#define PTLLND_MSG_TYPE_NAK 0x06 - diff --git a/lnet/include/lnet/socklnd.h b/lnet/include/lnet/socklnd.h deleted file mode 100644 index 301f8a8e641b4f74f10b170c5b8c5404c2e6512e..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/socklnd.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * <lnet/socklnd.h> - * - * #defines shared between socknal implementation and utilities - */ -#ifndef __LNET_LNET_SOCKLND_H__ -#define __LNET_LNET_SOCKLND_H__ - -#include <lnet/types.h> -#include <lnet/lib-types.h> - -#define SOCKLND_CONN_NONE (-1) -#define SOCKLND_CONN_ANY 0 -#define SOCKLND_CONN_CONTROL 1 -#define SOCKLND_CONN_BULK_IN 2 -#define SOCKLND_CONN_BULK_OUT 3 -#define SOCKLND_CONN_NTYPES 4 - -typedef struct { - __u32 kshm_magic; /* magic number of socklnd message */ - __u32 kshm_version; /* version of socklnd message */ - lnet_nid_t kshm_src_nid; /* sender's nid */ - lnet_nid_t kshm_dst_nid; /* destination nid */ - lnet_pid_t kshm_src_pid; /* sender's pid */ - lnet_pid_t kshm_dst_pid; /* destination pid */ - __u64 kshm_src_incarnation; /* sender's incarnation */ - __u64 kshm_dst_incarnation; /* destination's incarnation */ - __u32 kshm_ctype; /* connection type */ - __u32 kshm_nips; /* # IP addrs */ - __u32 kshm_ips[0]; /* IP addrs */ -} WIRE_ATTR ksock_hello_msg_t; - -typedef struct { - lnet_hdr_t ksnm_hdr; /* lnet hdr */ - char ksnm_payload[0];/* lnet payload */ -} WIRE_ATTR ksock_lnet_msg_t; - -typedef struct { - __u32 ksm_type; /* type of socklnd message */ - __u32 ksm_csum; /* checksum if != 0 */ - __u64 ksm_zc_req_cookie; /* ack required if != 0 */ - __u64 ksm_zc_ack_cookie; /* ack if != 0 */ - union { - ksock_lnet_msg_t lnetmsg; /* lnet message, it's empty if it's NOOP */ - } WIRE_ATTR ksm_u; -} WIRE_ATTR ksock_msg_t; - -#define KSOCK_MSG_NOOP 0xc0 /* ksm_u empty */ -#define KSOCK_MSG_LNET 0xc1 /* lnet msg */ - -#endif diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h deleted file mode 100644 index f459b1e41709b4fc92b893a57596289d216429d4..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/types.h +++ /dev/null @@ -1,165 +0,0 @@ -#ifndef __LNET_TYPES_H__ -#define __LNET_TYPES_H__ - -#include <libcfs/libcfs.h> - -#define LNET_RESERVED_PORTAL 0 /* portals reserved for lnet's own use */ - -typedef __u64 lnet_nid_t; -typedef __u32 lnet_pid_t; - -#define LNET_NID_ANY ((lnet_nid_t) -1) -#define LNET_PID_ANY ((lnet_pid_t) -1) - -#ifdef CRAY_XT3 -typedef __u32 lnet_uid_t; -#define LNET_UID_ANY ((lnet_uid_t) -1) -#endif - -#define LNET_PID_RESERVED 0xf0000000 /* reserved bits in PID */ -#define LNET_PID_USERFLAG 0x80000000 /* set in userspace peers */ - -#define LNET_TIME_FOREVER (-1) - -typedef struct { - __u64 cookie; -} lnet_handle_any_t; - -typedef lnet_handle_any_t lnet_handle_eq_t; -typedef lnet_handle_any_t lnet_handle_md_t; -typedef lnet_handle_any_t lnet_handle_me_t; - -#define LNET_INVALID_HANDLE \ - ((const lnet_handle_any_t){.cookie = -1}) -#define LNET_EQ_NONE LNET_INVALID_HANDLE - -static inline int LNetHandleIsEqual (lnet_handle_any_t h1, lnet_handle_any_t h2) -{ - return (h1.cookie == h2.cookie); -} - -typedef struct { - lnet_nid_t nid; - lnet_pid_t pid; /* node id / process id */ -} lnet_process_id_t; - -typedef enum { - LNET_RETAIN = 0, - LNET_UNLINK -} lnet_unlink_t; - -typedef enum { - LNET_INS_BEFORE, - LNET_INS_AFTER -} lnet_ins_pos_t; - -typedef struct { - void *start; - unsigned int length; - int threshold; - int max_size; - unsigned int options; - void *user_ptr; - lnet_handle_eq_t eq_handle; -} lnet_md_t; - -/* Max Transfer Unit (minimum supported everywhere) */ -#define LNET_MTU_BITS 20 -#define LNET_MTU (1<<LNET_MTU_BITS) - -/* limit on the number of entries in discontiguous MDs */ -#define LNET_MAX_IOV 256 - -/* Max payload size */ -#ifndef LNET_MAX_PAYLOAD -# error "LNET_MAX_PAYLOAD must be defined in config.h" -#else -# if (LNET_MAX_PAYLOAD < LNET_MTU) -# error "LNET_MAX_PAYLOAD too small - error in configure --with-max-payload-mb" -# elif defined(__KERNEL__) -# if (LNET_MAX_PAYLOAD > (PAGE_SIZE * LNET_MAX_IOV)) -/* PAGE_SIZE is a constant: check with cpp! */ -# error "LNET_MAX_PAYLOAD too large - error in configure --with-max-payload-mb" -# endif -# endif -#endif - -/* Options for the MD structure */ -#define LNET_MD_OP_PUT (1 << 0) -#define LNET_MD_OP_GET (1 << 1) -#define LNET_MD_MANAGE_REMOTE (1 << 2) -/* unused (1 << 3) */ -#define LNET_MD_TRUNCATE (1 << 4) -#define LNET_MD_ACK_DISABLE (1 << 5) -#define LNET_MD_IOVEC (1 << 6) -#define LNET_MD_MAX_SIZE (1 << 7) -#define LNET_MD_KIOV (1 << 8) - -/* For compatibility with Cray Portals */ -#define LNET_MD_PHYS 0 - -#define LNET_MD_THRESH_INF (-1) - -/* NB lustre portals uses struct iovec internally! */ -typedef struct iovec lnet_md_iovec_t; - -typedef struct { - cfs_page_t *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} lnet_kiov_t; - -typedef enum { - LNET_EVENT_GET, - LNET_EVENT_PUT, - LNET_EVENT_REPLY, - LNET_EVENT_ACK, - LNET_EVENT_SEND, - LNET_EVENT_UNLINK, -} lnet_event_kind_t; - -#define LNET_SEQ_BASETYPE long -typedef unsigned LNET_SEQ_BASETYPE lnet_seq_t; -#define LNET_SEQ_GT(a,b) (((signed LNET_SEQ_BASETYPE)((a) - (b))) > 0) - -/* XXX - * cygwin need the pragma line, not clear if it's needed in other places. - * checking!!! - */ -#ifdef __CYGWIN__ -#pragma pack(push, 4) -#endif -typedef struct { - lnet_process_id_t target; - lnet_process_id_t initiator; - lnet_nid_t sender; - lnet_event_kind_t type; - unsigned int pt_index; - __u64 match_bits; - unsigned int rlength; - unsigned int mlength; - lnet_handle_md_t md_handle; - lnet_md_t md; - __u64 hdr_data; - int status; - int unlinked; - unsigned int offset; -#ifdef CRAY_XT3 - lnet_uid_t uid; -#endif - - volatile lnet_seq_t sequence; -} lnet_event_t; -#ifdef __CYGWIN__ -#pragma pop -#endif - -typedef enum { - LNET_ACK_REQ, - LNET_NOACK_REQ -} lnet_ack_req_t; - -typedef void (*lnet_eq_handler_t)(lnet_event_t *event); -#define LNET_EQ_HANDLER_NONE NULL - -#endif diff --git a/lnet/include/lnet/winnt/api-support.h b/lnet/include/lnet/winnt/api-support.h deleted file mode 100644 index 8806981b1a173454b2c2496281952279f9e79d61..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/api-support.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __WINNT_API_SUPPORT_H__ -#define __WINNT_API_SUPPORT_H__ - -#ifndef __LNET_API_SUPPORT_H__ -#error Do not #include this file directly. #include <lnet/api-support.h> instead -#endif - - -#endif diff --git a/lnet/include/lnet/winnt/lib-lnet.h b/lnet/include/lnet/winnt/lib-lnet.h deleted file mode 100644 index bb3e5af83089f30309af97020472eebf600a611e..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lib-lnet.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_WINNT_LIB_LNET_H__ -#define __LNET_WINNT_LIB_LNET_H__ - -#ifndef __LNET_LIB_LNET_H__ -#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead -#endif - -#ifdef __KERNEL__ -# include <libcfs/libcfs.h> -# include <libcfs/kp30.h> - -static inline __u64 -lnet_page2phys (struct page *p) -{ - return 0; -} - -#else /* __KERNEL__ */ - -#endif - -#endif /* __LNET_WINNT_LIB_LNET_H__ */ diff --git a/lnet/include/lnet/winnt/lib-types.h b/lnet/include/lnet/winnt/lib-types.h deleted file mode 100644 index 33a31341286598ee95e2fce7ff970a6119849639..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lib-types.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LNET_WINNT_LIB_TYPES_H__ -#define __LNET_WINNT_LIB_TYPES_H__ - -#ifndef __LNET_LIB_TYPES_H__ -#error Do not #include this file directly. #include <lnet/lib-types.h> instead -#endif - -#include <libcfs/libcfs.h> - -typedef struct { - spinlock_t lock; -} lib_ni_lock_t; - -static inline void lib_ni_lock_init(lib_ni_lock_t *l) -{ - spin_lock_init(&l->lock); -} - -static inline void lib_ni_lock_fini(lib_ni_lock_t *l) -{} - -static inline void lib_ni_lock(lib_ni_lock_t *l) -{ - int flags; - spin_lock_irqsave(&l->lock, flags); -} - -static inline void lib_ni_unlock(lib_ni_lock_t *l) -{ - spin_unlock_irqrestore(&l->lock, 0); -} - -#endif diff --git a/lnet/include/lnet/winnt/lnet.h b/lnet/include/lnet/winnt/lnet.h deleted file mode 100644 index 7a3d24db2ce8bcd1106ca8e4a15f3e05865febeb..0000000000000000000000000000000000000000 --- a/lnet/include/lnet/winnt/lnet.h +++ /dev/null @@ -1,511 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef __LNET_LINUX_LNET_H__ -#define __LNET_LINUX_LNET_H__ - -#ifndef __LNET_H__ -#error Do not #include this file directly. #include <lnet/lnet.h> instead -#endif - -#ifdef __KERNEL__ - -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> - -/* - * tdilnd routines - */ - - -PUCHAR -KsNtStatusToString (IN NTSTATUS Status); - - -VOID -KsPrintf( - IN LONG DebugPrintLevel, - IN PCHAR DebugMessage, - IN ... - ); - - -ksock_mdl_t * -ks_lock_iovs( - IN struct iovec *iov, - IN int niov, - IN int recv, - IN int * len - ); - -ksock_mdl_t * -ks_lock_kiovs( - IN lnet_kiov_t * kiov, - IN int nkiov, - IN int recv, - IN int * len - ); - -int -ks_send_mdl( - ksock_tconn_t * tconn, - void * tx, - ksock_mdl_t * mdl, - int len, - int flags - ); - -int -ks_query_data( - ksock_tconn_t * tconn, - size_t * size, - int bIsExpedited); - -int -ks_recv_mdl( - ksock_tconn_t * tconn, - ksock_mdl_t * mdl, - int size, - int flags - ); - -int -ks_get_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - PULONG Length - ); - -NTSTATUS -ks_set_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - ULONG Length - ); - -int -ks_bind_tconn ( - ksock_tconn_t * tconn, - ksock_tconn_t * parent, - ulong_ptr addr, - unsigned short port - ); - -int -ks_build_tconn( - ksock_tconn_t * tconn, - ulong_ptr addr, - unsigned short port - ); - -int -ks_disconnect_tconn( - ksock_tconn_t * tconn, - ulong_ptr flags - ); - -void -ks_abort_tconn( - ksock_tconn_t * tconn - ); - -int -ks_query_local_ipaddr( - ksock_tconn_t * tconn - ); - -int -ks_tconn_write (ksock_tconn_t *tconn, void *buffer, int nob); - -int -ks_tconn_read (ksock_tconn_t * tconn, void *buffer, int nob); - -NTSTATUS -KsTcpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - -NTSTATUS -KsDisconectCompletionRoutine ( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - -NTSTATUS -KsTcpReceiveCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ); - -NTSTATUS -KsTcpSendCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ); - -NTSTATUS -KsAcceptCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ); - - -NTSTATUS -KsConnectEventHandler( - IN PVOID TdiEventContext, - IN LONG RemoteAddressLength, - IN PVOID RemoteAddress, - IN LONG UserDataLength, - IN PVOID UserData, - IN LONG OptionsLength, - IN PVOID Options, - OUT CONNECTION_CONTEXT * ConnectionContext, - OUT PIRP * AcceptIrp - ); - -NTSTATUS -KsDisconnectEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN LONG DisconnectDataLength, - IN PVOID DisconnectData, - IN LONG DisconnectInformationLength, - IN PVOID DisconnectInformation, - IN ULONG DisconnectFlags - ); - -NTSTATUS -KsTcpReceiveEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ); - -NTSTATUS -KsTcpReceiveExpeditedEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ); - -NTSTATUS -KsTcpChainedReceiveEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ); - -NTSTATUS -KsTcpChainedReceiveExpeditedEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ); - - - -VOID -KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem); - - -ULONG -ks_tdi_send_flags(ULONG SockFlags); - -PIRP -KsBuildTdiIrp( - IN PDEVICE_OBJECT DeviceObject - ); - -NTSTATUS -KsSubmitTdiIrp( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN BOOLEAN bSynchronous, - OUT PULONG Information - ); - -NTSTATUS -KsOpenControl( - IN PUNICODE_STRING DeviceName, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseControl( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsOpenAddress( - IN PUNICODE_STRING DeviceName, - IN PTRANSPORT_ADDRESS pAddress, - IN ULONG AddressLength, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseAddress( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsOpenConnection( - IN PUNICODE_STRING DeviceName, - IN CONNECTION_CONTEXT ConnectionContext, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ); - -NTSTATUS -KsCloseConnection( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ); - -NTSTATUS -KsAssociateAddress( - IN HANDLE AddressHandle, - IN PFILE_OBJECT ConnectionObject - ); - - -NTSTATUS -KsDisassociateAddress( - IN PFILE_OBJECT ConnectionObject - ); - - -NTSTATUS -KsSetEventHandlers( - IN PFILE_OBJECT AddressObject, - IN PVOID EventContext, - IN PKS_EVENT_HANDLERS Handlers - ); - - -NTSTATUS -KsQueryProviderInfo( - PWSTR TdiDeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ); - -NTSTATUS -KsQueryAddressInfo( - IN PFILE_OBJECT FileObject, - OUT PTDI_ADDRESS_INFO AddressInfo, - OUT PULONG AddressSize - ); - -NTSTATUS -KsQueryConnectionInfo( - IN PFILE_OBJECT ConnectionObject, - OUT PTDI_CONNECTION_INFO ConnectionInfo, - OUT PULONG ConnectionSize - ); - -ULONG -KsInitializeTdiAddress( - IN OUT PTA_IP_ADDRESS pTransportAddress, - IN ULONG IpAddress, - IN USHORT IpPort - ); - -ULONG -KsQueryMdlsSize (IN PMDL Mdl); - - -ULONG -KsQueryTdiAddressLength( - OUT PTRANSPORT_ADDRESS pTransportAddress - ); - -NTSTATUS -KsQueryIpAddress( - IN PFILE_OBJECT FileObject, - OUT PVOID TdiAddress, - OUT ULONG* AddressLength - ); - - -NTSTATUS -KsErrorEventHandler( - IN PVOID TdiEventContext, - IN NTSTATUS Status - ); - -int -ks_set_handlers( - ksock_tconn_t * tconn - ); - - -VOID -KsPrintProviderInfo( - PWSTR DeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ); - -ksock_tconn_t * -ks_create_tconn(); - -void -ks_free_tconn( - ksock_tconn_t * tconn - ); - -void -ks_init_listener( - ksock_tconn_t * tconn - ); - -void -ks_init_sender( - ksock_tconn_t * tconn - ); - -void -ks_init_child( - ksock_tconn_t * tconn - ); - -void -ks_get_tconn( - ksock_tconn_t * tconn - ); - -void -ks_put_tconn( - ksock_tconn_t * tconn - ); - -int -ks_reset_handlers( - ksock_tconn_t * tconn - ); - -void -ks_destroy_tconn( - ksock_tconn_t * tconn - ); - - -PKS_TSDU -KsAllocateKsTsdu(); - -VOID -KsPutKsTsdu( - PKS_TSDU KsTsdu - ); - -VOID -KsFreeKsTsdu( - PKS_TSDU KsTsdu - ); - -VOID -KsInitializeKsTsdu( - PKS_TSDU KsTsdu, - ULONG Length - ); - - -VOID -KsInitializeKsTsduMgr( - PKS_TSDUMGR TsduMgr - ); - -VOID -KsInitializeKsChain( - PKS_CHAIN KsChain - ); - -NTSTATUS -KsCleanupTsduMgr( - PKS_TSDUMGR KsTsduMgr - ); - -NTSTATUS -KsCleanupKsChain( - PKS_CHAIN KsChain - ); - -NTSTATUS -KsCleanupTsdu( - ksock_tconn_t * tconn - ); - -NTSTATUS -KsCopyMdlChainToMdlChain( - IN PMDL SourceMdlChain, - IN ULONG SourceOffset, - IN PMDL DestinationMdlChain, - IN ULONG DestinationOffset, - IN ULONG BytesTobecopied, - OUT PULONG BytesCopied - ); - -ULONG -KsQueryMdlsSize (PMDL Mdl); - -NTSTATUS -KsLockUserBuffer ( - IN PVOID UserBuffer, - IN BOOLEAN bPaged, - IN ULONG Length, - IN LOCK_OPERATION Operation, - OUT PMDL * pMdl - ); - -PVOID -KsMapMdlBuffer (PMDL Mdl); - -VOID -KsReleaseMdl ( IN PMDL Mdl, - IN int Paged ); - -int -ks_lock_buffer ( - void * buffer, - int paged, - int length, - LOCK_OPERATION access, - ksock_mdl_t ** kmdl - ); - -void * -ks_map_mdl (ksock_mdl_t * mdl); - -void -ks_release_mdl (ksock_mdl_t *mdl, int paged); - -#endif /* __KERNEL__ */ - -#endif diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore deleted file mode 100644 index f5fd0b02c2417a69331ad6a19e2e5d033590cc47..0000000000000000000000000000000000000000 --- a/lnet/klnds/.cvsignore +++ /dev/null @@ -1,5 +0,0 @@ -Makefile -autoMakefile -autoMakefile.in -.*.cmd -.depend diff --git a/lnet/klnds/Makefile.in b/lnet/klnds/Makefile.in deleted file mode 100644 index d4e034cd46164cbba9c9904c2ead1dbd0e813c59..0000000000000000000000000000000000000000 --- a/lnet/klnds/Makefile.in +++ /dev/null @@ -1,13 +0,0 @@ -@BUILD_GMLND_TRUE@subdir-m += gmlnd -@BUILD_MXLND_TRUE@subdir-m += mxlnd -@BUILD_RALND_TRUE@subdir-m += ralnd -@BUILD_O2IBLND_TRUE@subdir-m += o2iblnd -@BUILD_OPENIBLND_TRUE@subdir-m += openiblnd -@BUILD_CIBLND_TRUE@subdir-m += ciblnd -@BUILD_IIBLND_TRUE@subdir-m += iiblnd -@BUILD_VIBLND_TRUE@subdir-m += viblnd -@BUILD_QSWLND_TRUE@subdir-m += qswlnd -@BUILD_PTLLND_TRUE@subdir-m += ptllnd -subdir-m += socklnd - -@INCLUDE_RULES@ diff --git a/lnet/klnds/autoMakefile.am b/lnet/klnds/autoMakefile.am deleted file mode 100644 index e6d0146e6527ad1da7da2785c0b193e7dffd9377..0000000000000000000000000000000000000000 --- a/lnet/klnds/autoMakefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = socklnd qswlnd gmlnd mxlnd openiblnd iiblnd viblnd ralnd ptllnd ciblnd o2iblnd diff --git a/lnet/klnds/ciblnd/.cvsignore b/lnet/klnds/ciblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/ciblnd/Makefile.in b/lnet/klnds/ciblnd/Makefile.in deleted file mode 100644 index 55311ad06da9fffb43246e5996d96c518693f6e2..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/Makefile.in +++ /dev/null @@ -1,8 +0,0 @@ -MODULES := kciblnd -kciblnd-objs := ciblnd.o ciblnd_cb.o ciblnd_modparams.o - -default: all - -EXTRA_POST_CFLAGS := @CIBCPPFLAGS@ -I@LUSTRE@/../lnet/klnds/openiblnd - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ciblnd/autoMakefile.am b/lnet/klnds/ciblnd/autoMakefile.am deleted file mode 100644 index cae5cfc66c8bc6ce4c174455b7369ba12ec29f11..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/autoMakefile.am +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_CIBLND -modulenet_DATA = kciblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kciblnd-objs:%.o=%.c) - diff --git a/lnet/klnds/ciblnd/ciblnd.c b/lnet/klnds/ciblnd/ciblnd.c deleted file mode 100644 index e13948441b6c13ef4848bfa87c9731d72b4155cc..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd.c" diff --git a/lnet/klnds/ciblnd/ciblnd_cb.c b/lnet/klnds/ciblnd/ciblnd_cb.c deleted file mode 100644 index 893e16d655aed13d3cff9eaf79822b0f4689ffe7..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd_cb.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd_cb.c" diff --git a/lnet/klnds/ciblnd/ciblnd_modparams.c b/lnet/klnds/ciblnd/ciblnd_modparams.c deleted file mode 100644 index a0c6b1fab7322550e2f928d752ba43c47214a55a..0000000000000000000000000000000000000000 --- a/lnet/klnds/ciblnd/ciblnd_modparams.c +++ /dev/null @@ -1 +0,0 @@ -#include "openiblnd_modparams.c" diff --git a/lnet/klnds/gmlnd/.cvsignore b/lnet/klnds/gmlnd/.cvsignore deleted file mode 100644 index 642e2e6cc0e58fd056bd3c99bd1fa72521b9e8b7..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.cmd -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/gmlnd/Makefile.in b/lnet/klnds/gmlnd/Makefile.in deleted file mode 100644 index 1aec50d5f08f281a6da9941e19c0df0474859767..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kgmlnd -kgmlnd-objs := gmlnd_api.o gmlnd_cb.o gmlnd_comm.o gmlnd_utils.o gmlnd_module.o - -EXTRA_PRE_CFLAGS := @GMCPPFLAGS@ -DGM_KERNEL - -@INCLUDE_RULES@ diff --git a/lnet/klnds/gmlnd/README b/lnet/klnds/gmlnd/README deleted file mode 100644 index ac2e23ddb7c8b1671b60ea3c84eb26c5deebaa4e..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/README +++ /dev/null @@ -1,73 +0,0 @@ -1. This version of the GM nal requires an unreleased extension to the GM API to - map physical memory: gm_register_memory_ex_phys(). This allows it to avoid - ENOMEM problems associated with large contiguous buffer allocation. - -2. ./configure --with-gm=<path-to-gm-source-tree> \ - [--with-gm-install=<path-to-gm-installation>] - - If the sources do not support gm_register_memory_ex_phys(), configure flags - an error. In this case you should apply the patch and rebuild and re-install - GM as directed in the error message. - - By default GM is installed in /opt/gm. If an alternate path was specified to - <GM-sources>/binary/GM_INSTALL, you should also specify --with-gm-install - with the same path. - -3. The GM timeout is 300 seconds; i.e. the network may not release resources - claimed by communications stalled with a crashing node for this time. - Default gmnal buffer tuning parameters (see (4) below) have been chosen to - minimize this problem and prevent lustre having to block for resources. - However in some situations, where all network buffers are busy, the default - lustre timeout (various, scaled from the base timeout of 100 seconds) may be - too small and the only solution may be to increase the lustre timeout - dramatically. - -4. The gmnal has the following module parameters... - - gmnal_port The GM port that the NAL will use (default 4) - Change this if it conflicts with site usage. - - gmnal_ntx The number of "normal" transmit descriptors (default - 32). When this pool is exhausted, threads sending - and receiving on the network block until in-progress - transmits have completed. Each descriptor consumes 1 - GM_MTU sized buffer. - - gmnal_ntx_nblk The number of "reserved" transmit descriptors - (default 256). This pool is reserved for responses to - incoming communications that may not block. Increase - only if console error messages indicates the pool - has been exhausted (LustreError: Can't get tx for - msg type...) Each descriptor consumes 1 GM_MTU sized - buffer. - - gmnal_nlarge_tx_bufs The number of 1MByte transmit buffers to reserve at - startup (default 32). This controls the number of - concurrent sends larger that GM_MTU. It can be - reduced to conserve memory, or increased to increase - large message sending concurrency. - - gmnal_nrx_small The number of GM_MTU sized receive buffers posted to - receive from the network (default 128). Increase if - congestion is suspected, however note that the total - number of receives that can be posted at any time is - limited by the number of GM receive tokens - available. If there are too few, this, and - gmnal_nrx_large are scaled back accordingly. - - gmnal_nrx_large The number of 1MByte receive buffers posted to - receive from the network (default 64). Increase if - the number of OST threads is increased. But note - that the total number of receives that can be posted - at any time is limited by the number of GM receive - tokens available. If there are too few, this, and - gmnal_nrx_small are scaled back accordingly. - -5. Network configuration for GM is done in an lmc script as follows... - - GM2NID=${path-to-lustre-tree}/portals/utils/gmnalnid - - ${LMC} --node some_server --add net --nettype gm --nid `$GM2NID -n some_server` - - ${LMC} --node client --add net --nettype gm --nid '*' - diff --git a/lnet/klnds/gmlnd/autoMakefile.am b/lnet/klnds/gmlnd/autoMakefile.am deleted file mode 100644 index 6ff7933633ee7a00f2a5f252ec0c0c7ba48f31fb..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_GMLND -modulenet_DATA = kgmlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kgmlnd-objs:%.o=%.c) gmlnd.h diff --git a/lnet/klnds/gmlnd/gm-reg-phys.patch b/lnet/klnds/gmlnd/gm-reg-phys.patch deleted file mode 100644 index df32a219361c34d3ecbe8126e962c63465595057..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gm-reg-phys.patch +++ /dev/null @@ -1,107 +0,0 @@ -Index: libgm/gm_register.c -=================================================================== -RCS file: /repository/gm/libgm/gm_register.c,v -retrieving revision 1.9.16.3 -diff -u -r1.9.16.3 gm_register.c ---- libgm/gm_register.c 9 Aug 2005 14:37:02 -0000 1.9.16.3 -+++ libgm/gm_register.c 25 Aug 2005 21:35:58 -0000 -@@ -77,20 +77,14 @@ - - */ - --GM_ENTRY_POINT --gm_status_t --gm_register_memory_ex (gm_port_t *p, void *_ptr, gm_size_t length, void *_pvma) -+static gm_status_t -+_gm_register_memory (gm_port_t *p, int is_physical, gm_u64_t ptr, gm_size_t length, gm_up_t pvma) - { - gm_status_t status; -- gm_up_t ptr; -- gm_up_t pvma; - - GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL",%p", - p, _ptr, GM_U64_ARG (length), _pvma)); - -- ptr = GM_PTR_TO_UP (_ptr); -- pvma = GM_PTR_TO_UP (_pvma); -- - #if !GM_KERNEL && !GM_CAN_REGISTER_MEMORY - GM_PARAMETER_MAY_BE_UNUSED (p); - GM_PARAMETER_MAY_BE_UNUSED (ptr); -@@ -160,7 +154,7 @@ - status = gm_add_mapping_to_page_table (ps, - ptr + offset, - pvma + offset, -- GM_INVALID_DMA_PAGE); -+ is_physical ? ptr + offset : GM_INVALID_DMA_PAGE); - if (status != GM_SUCCESS) - { - status = GM_INVALID_PARAMETER; -@@ -317,13 +311,31 @@ - - */ - -+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64) -+/* only architecture where pci bus addr == physical address can use -+ such a simple scheme */ -+GM_ENTRY_POINT gm_status_t -+gm_register_memory_ex_phys (struct gm_port *p, -+ gm_u64_t phys, gm_size_t length, -+ gm_up_t pvma) -+{ -+ return _gm_register_memory(p, 1, phys, length, (gm_size_t)pvma); -+} -+#endif -+ -+GM_ENTRY_POINT gm_status_t -+gm_register_memory_ex (gm_port_t *p, void *ptr, gm_size_t length, void *pvma) -+{ -+ return _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)pvma); -+} -+ - GM_ENTRY_POINT gm_status_t - gm_register_memory (gm_port_t *p, void *ptr, gm_size_t length) - { - gm_status_t status; - - GM_CALLED_WITH_ARGS (("%p,%p,"GM_U64_TMPL, p, ptr, GM_U64_ARG (length))); -- status = gm_register_memory_ex (p, ptr, length, ptr); -+ status = _gm_register_memory(p, 0, (gm_size_t)ptr, length, (gm_size_t)ptr); - GM_RETURN_STATUS (status); - } - -Index: include/gm.h -=================================================================== -RCS file: /repository/gm/include/gm.h,v -retrieving revision 1.25.10.11 -diff -u -r1.25.10.11 gm.h ---- include/gm.h 14 Mar 2005 21:42:41 -0000 1.25.10.11 -+++ include/gm.h 25 Aug 2005 21:35:58 -0000 -@@ -2676,6 +2676,10 @@ - GM_ENTRY_POINT gm_status_t gm_register_memory_ex (struct gm_port *p, - void *ptr, gm_size_t length, - void *pvma); -+ -+GM_ENTRY_POINT gm_status_t gm_register_memory_ex_phys (struct gm_port *p, -+ gm_u64_t phys, gm_size_t length, -+ gm_up_t pvma); - #endif /* GM_API_VERSION >= GM_API_VERSION_2_0_6 */ - - #if GM_API_VERSION >= GM_API_VERSION_2_1_0 -Index: libgm/gm_reference_api.c -=================================================================== -RCS file: /repository/gm/libgm/gm_reference_api.c,v -retrieving revision 1.3.14.1 -diff -u -r1.3.14.1 gm_reference_api.c ---- libgm/gm_reference_api.c 23 Apr 2004 20:27:29 -0000 1.3.14.1 -+++ libgm/gm_reference_api.c 25 Aug 2005 22:39:20 -0000 -@@ -154,6 +154,9 @@ - GM_REF (gm_register_buffer); - GM_REF (gm_register_memory); - GM_REF (gm_register_memory_ex); -+#if GM_KERNEL && (GM_CPU_x86 || GM_CPU_x86_64 || GM_CPU_ia64) -+GM_REF (gm_register_memory_ex_phys); -+#endif - GM_REF (gm_resume_sending); - GM_REF (gm_send); - GM_REF (gm_send_to_peer); diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h deleted file mode 100644 index 6936737557e342e363d073dd00c3fb2ac8651eae..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd.h +++ /dev/null @@ -1,245 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * Portals GM kernel NAL header file - * This file makes all declaration and prototypes - * for the API side and CB side of the NAL - */ -#ifndef __INCLUDE_GMNAL_H__ -#define __INCLUDE_GMNAL_H__ - -/* XXX Lustre as of V1.2.2 drop defines VERSION, which causes problems - * when including <GM>/include/gm_lanai.h which defines a structure field - * with the name VERSION XXX */ -#ifdef VERSION -# undef VERSION -#endif - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include "linux/config.h" -#include "linux/module.h" -#include "linux/tty.h" -#include "linux/kernel.h" -#include "linux/mm.h" -#include "linux/string.h" -#include "linux/stat.h" -#include "linux/errno.h" -#include "linux/version.h" -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) -#include "linux/buffer_head.h" -#include "linux/fs.h" -#else -#include "linux/locks.h" -#endif -#include "linux/unistd.h" -#include "linux/init.h" -#include "linux/sem.h" -#include "linux/vmalloc.h" -#include "linux/sysctl.h" - -#define DEBUG_SUBSYSTEM S_LND - -#include "libcfs/kp30.h" -#include "lnet/lnet.h" -#include "lnet/lib-lnet.h" - -/* undefine these before including the GM headers which clash */ -#undef PACKAGE_BUGREPORT -#undef PACKAGE_NAME -#undef PACKAGE_STRING -#undef PACKAGE_TARNAME -#undef PACKAGE_VERSION - -#define GM_STRONG_TYPES 1 -#ifdef VERSION -#undef VERSION -#endif -#include "gm.h" -#include "gm_internal.h" - -/* Fixed tunables */ -#define GMNAL_RESCHED 100 /* # busy loops to force scheduler to yield */ -#define GMNAL_NETADDR_BASE 0x10000000 /* where we start in network VM */ -#define GMNAL_LARGE_PRIORITY GM_LOW_PRIORITY /* large message GM priority */ -#define GMNAL_SMALL_PRIORITY GM_LOW_PRIORITY /* small message GM priority */ - -/* Wire protocol */ -typedef struct { - lnet_hdr_t gmim_hdr; /* portals header */ - char gmim_payload[0]; /* payload */ -} gmnal_immediate_msg_t; - -typedef struct { - /* First 2 fields fixed FOR ALL TIME */ - __u32 gmm_magic; /* I'm a GM message */ - __u16 gmm_version; /* this is my version number */ - - __u16 gmm_type; /* msg type */ - __u64 gmm_srcnid; /* sender's NID */ - __u64 gmm_dstnid; /* destination's NID */ - union { - gmnal_immediate_msg_t immediate; - } gmm_u; -} WIRE_ATTR gmnal_msg_t; - -#define GMNAL_MSG_MAGIC LNET_PROTO_GM_MAGIC -#define GMNAL_MSG_VERSION 1 -#define GMNAL_MSG_IMMEDIATE 1 - -typedef struct netbuf { - __u64 nb_netaddr; /* network VM address */ - lnet_kiov_t nb_kiov[1]; /* the pages (at least 1) */ -} gmnal_netbuf_t; - -#define GMNAL_NETBUF_MSG(nb) ((gmnal_msg_t *)page_address((nb)->nb_kiov[0].kiov_page)) -#define GMNAL_NETBUF_LOCAL_NETADDR(nb) ((void *)((unsigned long)(nb)->nb_netaddr)) - -typedef struct gmnal_txbuf { - struct list_head txb_list; /* queue on gmni_idle_ltxbs */ - struct gmnal_txbuf *txb_next; /* stash on gmni_ltxs */ - gmnal_netbuf_t txb_buf; /* space */ -} gmnal_txbuf_t; - -typedef struct gmnal_tx { - struct list_head tx_list; /* queue */ - int tx_credit:1; /* consumed a credit? */ - int tx_large_iskiov:1; /* large is in kiovs? */ - struct gmnal_ni *tx_gmni; /* owning NI */ - lnet_nid_t tx_nid; /* destination NID */ - int tx_gmlid; /* destination GM local ID */ - lnet_msg_t *tx_lntmsg; /* lntmsg to finalize on completion */ - - gmnal_netbuf_t tx_buf; /* small tx buffer */ - gmnal_txbuf_t *tx_ltxb; /* large buffer (to free on completion) */ - int tx_msgnob; /* message size (so far) */ - - int tx_large_nob; /* # bytes large buffer payload */ - int tx_large_offset; /* offset within frags */ - int tx_large_niov; /* # VM frags */ - union { - struct iovec *iov; /* mapped frags */ - lnet_kiov_t *kiov; /* page frags */ - } tx_large_frags; - unsigned long tx_launchtime; /* when (in jiffies) the transmit was launched */ - struct gmnal_tx *tx_next; /* stash on gmni_txs */ -} gmnal_tx_t; - -typedef struct gmnal_rx { - struct list_head rx_list; /* enqueue on gmni_rxq for handling */ - int rx_islarge:1; /* large receive buffer? */ - unsigned int rx_recv_nob; /* bytes received */ - __u16 rx_recv_gmid; /* sender */ - __u8 rx_recv_port; /* sender's port */ - __u8 rx_recv_type; /* ?? */ - struct gmnal_rx *rx_next; /* stash on gmni_rxs */ - gmnal_netbuf_t rx_buf; /* the buffer */ -} gmnal_rx_t; - -typedef struct gmnal_ni { - lnet_ni_t *gmni_ni; /* generic NI */ - struct gm_port *gmni_port; /* GM port */ - spinlock_t gmni_gm_lock; /* serialise GM calls */ - int gmni_large_pages; /* # pages in a large message buffer */ - int gmni_large_msgsize; /* nob in large message buffers */ - int gmni_large_gmsize; /* large message GM bucket */ - int gmni_small_msgsize; /* nob in small message buffers */ - int gmni_small_gmsize; /* small message GM bucket */ - __u64 gmni_netaddr_base; /* base of mapped network VM */ - int gmni_netaddr_size; /* # bytes of mapped network VM */ - - gmnal_tx_t *gmni_txs; /* all txs */ - gmnal_rx_t *gmni_rxs; /* all rx descs */ - gmnal_txbuf_t *gmni_ltxbs; /* all large tx bufs */ - - atomic_t gmni_nthreads; /* total # threads */ - gm_alarm_t gmni_alarm; /* alarm to wake caretaker */ - int gmni_shutdown; /* tell all threads to exit */ - - struct list_head gmni_idle_txs; /* idle tx's */ - int gmni_tx_credits; /* # transmits still possible */ - struct list_head gmni_idle_ltxbs; /* idle large tx buffers */ - struct list_head gmni_buf_txq; /* tx's waiting for buffers */ - struct list_head gmni_cred_txq; /* tx's waiting for credits */ - spinlock_t gmni_tx_lock; /* serialise */ - - struct gm_hash *gmni_rx_hash; /* buffer->rx lookup */ - struct semaphore gmni_rx_mutex; /* serialise blocking on GM */ -} gmnal_ni_t; - -typedef struct { - int *gm_port; - int *gm_ntx; - int *gm_credits; - int *gm_peer_credits; - int *gm_nlarge_tx_bufs; - int *gm_nrx_small; - int *gm_nrx_large; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *gm_sysctl; /* sysctl interface */ -#endif -} gmnal_tunables_t; - - -/* gmnal_api.c */ -int gmnal_init(void); -void gmnal_fini(void); -int gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int gmnal_startup(lnet_ni_t *ni); -void gmnal_shutdown(lnet_ni_t *ni); - -/* gmnal_cb.c */ -int gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); - -/* gmnal_util.c */ -void gmnal_free_ltxbufs(gmnal_ni_t *gmni); -int gmnal_alloc_ltxbufs(gmnal_ni_t *gmni); -void gmnal_free_txs(gmnal_ni_t *gmni); -int gmnal_alloc_txs(gmnal_ni_t *gmni); -void gmnal_free_rxs(gmnal_ni_t *gmni); -int gmnal_alloc_rxs(gmnal_ni_t *gmni); -char *gmnal_gmstatus2str(gm_status_t status); -char *gmnal_rxevent2str(gm_recv_event_t *ev); -void gmnal_yield(int delay); - -/* gmnal_comm.c */ -void gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx); -gmnal_tx_t *gmnal_get_tx(gmnal_ni_t *gmni); -void gmnal_tx_done(gmnal_tx_t *tx, int rc); -void gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg, - lnet_nid_t dstnid, int type); -void gmnal_stop_threads(gmnal_ni_t *gmni); -int gmnal_start_threads(gmnal_ni_t *gmni); -void gmnal_check_txqueues_locked (gmnal_ni_t *gmni); - -/* Module Parameters */ -extern gmnal_tunables_t gmnal_tunables; - -#endif /*__INCLUDE_GMNAL_H__*/ diff --git a/lnet/klnds/gmlnd/gmlnd_api.c b/lnet/klnds/gmlnd/gmlnd_api.c deleted file mode 100644 index a5c426f1b61a181f7e51f8484066943f999cee1a..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_api.c +++ /dev/null @@ -1,262 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Implements the API NAL functions - */ - -#include "gmlnd.h" - -lnd_t the_gmlnd = -{ - .lnd_type = GMLND, - .lnd_startup = gmnal_startup, - .lnd_shutdown = gmnal_shutdown, - .lnd_ctl = gmnal_ctl, - .lnd_send = gmnal_send, - .lnd_recv = gmnal_recv, -}; - -gmnal_ni_t *the_gmni = NULL; - -int -gmnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - - switch (cmd) { - case IOC_LIBCFS_REGISTER_MYNID: - if (data->ioc_nid == ni->ni_nid) - return 0; - - LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid)); - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return 0; - - default: - return (-EINVAL); - } -} - -int -gmnal_set_local_nid (gmnal_ni_t *gmni) -{ - lnet_ni_t *ni = gmni->gmni_ni; - __u32 local_gmid; - __u32 global_gmid; - gm_status_t gm_status; - - /* Called before anything initialised: no need to lock */ - gm_status = gm_get_node_id(gmni->gmni_port, &local_gmid); - if (gm_status != GM_SUCCESS) - return 0; - - CDEBUG(D_NET, "Local node id is [%u]\n", local_gmid); - - gm_status = gm_node_id_to_global_id(gmni->gmni_port, - local_gmid, - &global_gmid); - if (gm_status != GM_SUCCESS) - return 0; - - CDEBUG(D_NET, "Global node id is [%u]\n", global_gmid); - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), global_gmid); - return 1; -} - -void -gmnal_shutdown(lnet_ni_t *ni) -{ - gmnal_ni_t *gmni = ni->ni_data; - - CDEBUG(D_TRACE, "gmnal_api_shutdown: gmni [%p]\n", gmni); - - LASSERT (gmni == the_gmni); - - /* stop processing messages */ - gmnal_stop_threads(gmni); - - /* stop all network callbacks */ - gm_close(gmni->gmni_port); - gmni->gmni_port = NULL; - - gm_finalize(); - - gmnal_free_ltxbufs(gmni); - gmnal_free_txs(gmni); - gmnal_free_rxs(gmni); - - LIBCFS_FREE(gmni, sizeof(*gmni)); - - the_gmni = NULL; -} - -int -gmnal_startup(lnet_ni_t *ni) -{ - gmnal_ni_t *gmni = NULL; - gmnal_rx_t *rx = NULL; - gm_status_t gm_status; - int rc; - - LASSERT (ni->ni_lnd == &the_gmlnd); - - ni->ni_maxtxcredits = *gmnal_tunables.gm_credits; - ni->ni_peertxcredits = *gmnal_tunables.gm_peer_credits; - - if (the_gmni != NULL) { - CERROR("Only 1 instance supported\n"); - return -EINVAL; - } - - LIBCFS_ALLOC(gmni, sizeof(*gmni)); - if (gmni == NULL) { - CERROR("can't allocate gmni\n"); - return -ENOMEM; - } - - ni->ni_data = gmni; - - memset(gmni, 0, sizeof(*gmni)); - gmni->gmni_ni = ni; - spin_lock_init(&gmni->gmni_tx_lock); - spin_lock_init(&gmni->gmni_gm_lock); - INIT_LIST_HEAD(&gmni->gmni_idle_txs); - INIT_LIST_HEAD(&gmni->gmni_idle_ltxbs); - INIT_LIST_HEAD(&gmni->gmni_buf_txq); - INIT_LIST_HEAD(&gmni->gmni_cred_txq); - sema_init(&gmni->gmni_rx_mutex, 1); - - /* - * initialise the interface, - */ - CDEBUG(D_NET, "Calling gm_init\n"); - if (gm_init() != GM_SUCCESS) { - CERROR("call to gm_init failed\n"); - goto failed_0; - } - - CDEBUG(D_NET, "Calling gm_open with port [%d], version [%d]\n", - *gmnal_tunables.gm_port, GM_API_VERSION); - - gm_status = gm_open(&gmni->gmni_port, 0, *gmnal_tunables.gm_port, - "gmnal", GM_API_VERSION); - - if (gm_status != GM_SUCCESS) { - CERROR("Can't open GM port %d: %d (%s)\n", - *gmnal_tunables.gm_port, gm_status, - gmnal_gmstatus2str(gm_status)); - goto failed_1; - } - - CDEBUG(D_NET,"gm_open succeeded port[%p]\n",gmni->gmni_port); - - if (!gmnal_set_local_nid(gmni)) - goto failed_2; - - CDEBUG(D_NET, "portals_nid is %s\n", libcfs_nid2str(ni->ni_nid)); - - gmni->gmni_large_msgsize = - offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[LNET_MAX_PAYLOAD]); - gmni->gmni_large_gmsize = - gm_min_size_for_length(gmni->gmni_large_msgsize); - gmni->gmni_large_pages = - (gmni->gmni_large_msgsize + PAGE_SIZE - 1)/PAGE_SIZE; - - gmni->gmni_small_msgsize = MIN(GM_MTU, PAGE_SIZE); - gmni->gmni_small_gmsize = - gm_min_size_for_length(gmni->gmni_small_msgsize); - - gmni->gmni_netaddr_base = GMNAL_NETADDR_BASE; - gmni->gmni_netaddr_size = 0; - - CDEBUG(D_NET, "Msg size %08x/%08x [%d/%d]\n", - gmni->gmni_large_msgsize, gmni->gmni_small_msgsize, - gmni->gmni_large_gmsize, gmni->gmni_small_gmsize); - - if (gmnal_alloc_rxs(gmni) != 0) { - CERROR("Failed to allocate rx descriptors\n"); - goto failed_2; - } - - if (gmnal_alloc_txs(gmni) != 0) { - CERROR("Failed to allocate tx descriptors\n"); - goto failed_2; - } - - if (gmnal_alloc_ltxbufs(gmni) != 0) { - CERROR("Failed to allocate large tx buffers\n"); - goto failed_2; - } - - rc = gmnal_start_threads(gmni); - if (rc != 0) { - CERROR("Can't start threads: %d\n", rc); - goto failed_2; - } - - /* Start listening */ - for (rx = gmni->gmni_rxs; rx != NULL; rx = rx->rx_next) - gmnal_post_rx(gmni, rx); - - the_gmni = gmni; - - CDEBUG(D_NET, "gmnal_init finished\n"); - return 0; - - failed_2: - gm_close(gmni->gmni_port); - gmni->gmni_port = NULL; - - failed_1: - gm_finalize(); - - failed_0: - /* safe to free descriptors after network has been shut down */ - gmnal_free_ltxbufs(gmni); - gmnal_free_txs(gmni); - gmnal_free_rxs(gmni); - - LIBCFS_FREE(gmni, sizeof(*gmni)); - - return -EIO; -} - -/* - * Called when module loaded - */ -int gmnal_init(void) -{ - lnet_register_lnd(&the_gmlnd); - return 0; -} - -/* - * Called when module removed - */ -void gmnal_fini() -{ - lnet_unregister_lnd(&the_gmlnd); -} diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c deleted file mode 100644 index 503bedff16430e7b81a86d8fd71a1f539b669d5f..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ /dev/null @@ -1,161 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * This file implements the nal cb functions - */ - - -#include "gmlnd.h" - -int -gmnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - gmnal_ni_t *gmni = ni->ni_data; - gmnal_rx_t *rx = (gmnal_rx_t*)private; - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1; - int payload_offset = offsetof(gmnal_msg_t, - gmm_u.immediate.gmim_payload[0]); - int nob = payload_offset + mlen; - - LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE); - LASSERT (iov == NULL || kiov == NULL); - - if (rx->rx_recv_nob < nob) { - CERROR("Short message from nid %s: got %d, need %d\n", - libcfs_nid2str(msg->gmm_srcnid), rx->rx_recv_nob, nob); - gmnal_post_rx(gmni, rx); - return -EIO; - } - - if (kiov != NULL) - lnet_copy_kiov2kiov(niov, kiov, offset, - npages, rx->rx_buf.nb_kiov, payload_offset, - mlen); - else - lnet_copy_kiov2iov(niov, iov, offset, - npages, rx->rx_buf.nb_kiov, payload_offset, - mlen); - - lnet_finalize(ni, lntmsg, 0); - gmnal_post_rx(gmni, rx); - return 0; -} - -int -gmnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr= &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int len = lntmsg->msg_len; - gmnal_ni_t *gmni = ni->ni_data; - gm_status_t gmrc; - gmnal_tx_t *tx; - - LASSERT (iov == NULL || kiov == NULL); - - /* I may not block for a tx if I'm responding to an incoming message */ - tx = gmnal_get_tx(gmni); - if (tx == NULL) { - if (!gmni->gmni_shutdown) - CERROR ("Can't get tx for msg type %d for %s\n", - type, libcfs_nid2str(target.nid)); - return -EIO; - } - - tx->tx_nid = target.nid; - - gmrc = gm_global_id_to_node_id(gmni->gmni_port, LNET_NIDADDR(target.nid), - &tx->tx_gmlid); - if (gmrc != GM_SUCCESS) { - CERROR("Can't map Nid %s to a GM local ID: %d\n", - libcfs_nid2str(target.nid), gmrc); - /* NB tx_lntmsg not set => doesn't finalize */ - gmnal_tx_done(tx, -EIO); - return -EIO; - } - - gmnal_pack_msg(gmni, GMNAL_NETBUF_MSG(&tx->tx_buf), - target.nid, GMNAL_MSG_IMMEDIATE); - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_hdr = *hdr; - tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0]); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_magic = - LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - if (tx->tx_msgnob + len <= gmni->gmni_small_msgsize) { - /* whole message fits in tx_buf */ - char *buffer = &(GMNAL_NETBUF_MSG(&tx->tx_buf)->gmm_u.immediate.gmim_payload[0]); - - if (iov != NULL) - lnet_copy_iov2flat(len, buffer, 0, - niov, iov, offset, len); - else - lnet_copy_kiov2flat(len, buffer, 0, - niov, kiov, offset, len); - - tx->tx_msgnob += len; - tx->tx_large_nob = 0; - } else { - /* stash payload pts to copy later */ - tx->tx_large_nob = len; - tx->tx_large_iskiov = (kiov != NULL); - tx->tx_large_niov = niov; - if (tx->tx_large_iskiov) - tx->tx_large_frags.kiov = kiov; - else - tx->tx_large_frags.iov = iov; - } - - LASSERT(tx->tx_lntmsg == NULL); - tx->tx_lntmsg = lntmsg; - - spin_lock(&gmni->gmni_tx_lock); - - list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq); - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); - - return 0; -} diff --git a/lnet/klnds/gmlnd/gmlnd_comm.c b/lnet/klnds/gmlnd/gmlnd_comm.c deleted file mode 100644 index ea6a8d142549e6e4a734962351892f9a5366a051..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_comm.c +++ /dev/null @@ -1,563 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * This file contains all gmnal send and receive functions - */ - -#include "gmlnd.h" - -void -gmnal_notify_peer_down(gmnal_tx_t *tx) -{ - struct timeval now; - time_t then; - - do_gettimeofday (&now); - then = now.tv_sec - (jiffies - tx->tx_launchtime)/HZ; - - lnet_notify(tx->tx_gmni->gmni_ni, tx->tx_nid, 0, then); -} - -void -gmnal_pack_msg(gmnal_ni_t *gmni, gmnal_msg_t *msg, - lnet_nid_t dstnid, int type) -{ - /* CAVEAT EMPTOR! this only sets the common message fields. */ - msg->gmm_magic = GMNAL_MSG_MAGIC; - msg->gmm_version = GMNAL_MSG_VERSION; - msg->gmm_type = type; - msg->gmm_srcnid = lnet_ptlcompat_srcnid(gmni->gmni_ni->ni_nid, - dstnid); - msg->gmm_dstnid = dstnid; -} - -int -gmnal_unpack_msg(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - const int hdr_size = offsetof(gmnal_msg_t, gmm_u); - int buffnob = rx->rx_islarge ? gmni->gmni_large_msgsize : - gmni->gmni_small_msgsize; - int flip; - - /* rc = 0:SUCCESS -ve:failure +ve:version mismatch */ - - /* GM may not overflow our buffer */ - LASSERT (rx->rx_recv_nob <= buffnob); - - /* 6 bytes are enough to have received magic + version */ - if (rx->rx_recv_nob < 6) { - CERROR("Short message from gmid %u: %d\n", - rx->rx_recv_gmid, rx->rx_recv_nob); - return -EPROTO; - } - - if (msg->gmm_magic == GMNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->gmm_magic == __swab32(GMNAL_MSG_MAGIC)) { - flip = 1; - } else if (msg->gmm_magic == LNET_PROTO_MAGIC || - msg->gmm_magic == __swab32(LNET_PROTO_MAGIC)) { - return EPROTO; - } else { - CERROR("Bad magic from gmid %u: %08x\n", - rx->rx_recv_gmid, msg->gmm_magic); - return -EPROTO; - } - - if (msg->gmm_version != - (flip ? __swab16(GMNAL_MSG_VERSION) : GMNAL_MSG_VERSION)) { - return EPROTO; - } - - if (rx->rx_recv_nob < hdr_size) { - CERROR("Short message from %u: %d\n", - rx->rx_recv_gmid, rx->rx_recv_nob); - return -EPROTO; - } - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->gmm_version); - __swab16s(&msg->gmm_type); - __swab64s(&msg->gmm_srcnid); - __swab64s(&msg->gmm_dstnid); - } - - if (msg->gmm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid from %u: %s\n", - rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_srcnid)); - return -EPROTO; - } - - if (!lnet_ptlcompat_matchnid(gmni->gmni_ni->ni_nid, - msg->gmm_dstnid)) { - CERROR("Bad dst nid from %u: %s\n", - rx->rx_recv_gmid, libcfs_nid2str(msg->gmm_dstnid)); - return -EPROTO; - } - - switch (msg->gmm_type) { - default: - CERROR("Unknown message type from %u: %x\n", - rx->rx_recv_gmid, msg->gmm_type); - return -EPROTO; - - case GMNAL_MSG_IMMEDIATE: - if (rx->rx_recv_nob < offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0])) { - CERROR("Short IMMEDIATE from %u: %d("LPSZ")\n", - rx->rx_recv_gmid, rx->rx_recv_nob, - offsetof(gmnal_msg_t, gmm_u.immediate.gmim_payload[0])); - return -EPROTO; - } - break; - } - return 0; -} - -gmnal_tx_t * -gmnal_get_tx(gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx = NULL; - - spin_lock(&gmni->gmni_tx_lock); - - if (gmni->gmni_shutdown || - list_empty(&gmni->gmni_idle_txs)) { - spin_unlock(&gmni->gmni_tx_lock); - return NULL; - } - - tx = list_entry(gmni->gmni_idle_txs.next, gmnal_tx_t, tx_list); - list_del(&tx->tx_list); - - spin_unlock(&gmni->gmni_tx_lock); - - LASSERT (tx->tx_lntmsg == NULL); - LASSERT (tx->tx_ltxb == NULL); - LASSERT (!tx->tx_credit); - - return tx; -} - -void -gmnal_tx_done(gmnal_tx_t *tx, int rc) -{ - gmnal_ni_t *gmni = tx->tx_gmni; - int wake_sched = 0; - lnet_msg_t *lnetmsg = tx->tx_lntmsg; - - tx->tx_lntmsg = NULL; - - spin_lock(&gmni->gmni_tx_lock); - - if (tx->tx_ltxb != NULL) { - wake_sched = 1; - list_add_tail(&tx->tx_ltxb->txb_list, &gmni->gmni_idle_ltxbs); - tx->tx_ltxb = NULL; - } - - if (tx->tx_credit) { - wake_sched = 1; - gmni->gmni_tx_credits++; - tx->tx_credit = 0; - } - - list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs); - - if (wake_sched) - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); - - /* Delay finalize until tx is free */ - if (lnetmsg != NULL) - lnet_finalize(gmni->gmni_ni, lnetmsg, 0); -} - -void -gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, - gm_status_t status) -{ - gmnal_tx_t *tx = (gmnal_tx_t*)context; - - LASSERT(!in_interrupt()); - - CDEBUG(D_NET, "status for tx [%p] is [%d][%s], nid %s\n", - tx, status, gmnal_gmstatus2str(status), - libcfs_nid2str(tx->tx_nid)); - - gmnal_tx_done(tx, -EIO); -} - -void -gmnal_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status) -{ - gmnal_tx_t *tx = (gmnal_tx_t*)context; - gmnal_ni_t *gmni = tx->tx_gmni; - - LASSERT(!in_interrupt()); - - switch(status) { - case GM_SUCCESS: - gmnal_tx_done(tx, 0); - return; - - case GM_SEND_DROPPED: - CDEBUG(D_NETERROR, "Dropped tx %p to %s\n", - tx, libcfs_nid2str(tx->tx_nid)); - /* Another tx failed and called gm_drop_sends() which made this - * one complete immediately */ - gmnal_tx_done(tx, -EIO); - return; - - default: - /* Some error; NB don't complete tx yet; we need its credit for - * gm_drop_sends() */ - CDEBUG(D_NETERROR, "tx %p error %d(%s), nid %s\n", - tx, status, gmnal_gmstatus2str(status), - libcfs_nid2str(tx->tx_nid)); - - gmnal_notify_peer_down(tx); - - spin_lock(&gmni->gmni_gm_lock); - gm_drop_sends(gmni->gmni_port, - tx->tx_ltxb != NULL ? - GMNAL_LARGE_PRIORITY : GMNAL_SMALL_PRIORITY, - tx->tx_gmlid, *gmnal_tunables.gm_port, - gmnal_drop_sends_callback, tx); - spin_unlock(&gmni->gmni_gm_lock); - return; - } - - /* not reached */ - LBUG(); -} - -void -gmnal_check_txqueues_locked (gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - gmnal_txbuf_t *ltxb; - int gmsize; - int pri; - void *netaddr; - - tx = list_empty(&gmni->gmni_buf_txq) ? NULL : - list_entry(gmni->gmni_buf_txq.next, gmnal_tx_t, tx_list); - - if (tx != NULL && - (tx->tx_large_nob == 0 || - !list_empty(&gmni->gmni_idle_ltxbs))) { - - /* consume tx */ - list_del(&tx->tx_list); - - LASSERT (tx->tx_ltxb == NULL); - - if (tx->tx_large_nob != 0) { - ltxb = list_entry(gmni->gmni_idle_ltxbs.next, - gmnal_txbuf_t, txb_list); - - /* consume large buffer */ - list_del(<xb->txb_list); - - spin_unlock(&gmni->gmni_tx_lock); - - /* Unlocking here allows sends to get re-ordered, - * but we want to allow other CPUs to progress... */ - - tx->tx_ltxb = ltxb; - - /* marshall message in tx_ltxb... - * 1. Copy what was marshalled so far (in tx_buf) */ - memcpy(GMNAL_NETBUF_MSG(<xb->txb_buf), - GMNAL_NETBUF_MSG(&tx->tx_buf), tx->tx_msgnob); - - /* 2. Copy the payload */ - if (tx->tx_large_iskiov) - lnet_copy_kiov2kiov( - gmni->gmni_large_pages, - ltxb->txb_buf.nb_kiov, - tx->tx_msgnob, - tx->tx_large_niov, - tx->tx_large_frags.kiov, - tx->tx_large_offset, - tx->tx_large_nob); - else - lnet_copy_iov2kiov( - gmni->gmni_large_pages, - ltxb->txb_buf.nb_kiov, - tx->tx_msgnob, - tx->tx_large_niov, - tx->tx_large_frags.iov, - tx->tx_large_offset, - tx->tx_large_nob); - - tx->tx_msgnob += tx->tx_large_nob; - - spin_lock(&gmni->gmni_tx_lock); - } - - list_add_tail(&tx->tx_list, &gmni->gmni_cred_txq); - } - - if (!list_empty(&gmni->gmni_cred_txq) && - gmni->gmni_tx_credits != 0) { - - tx = list_entry(gmni->gmni_cred_txq.next, gmnal_tx_t, tx_list); - - /* consume tx and 1 credit */ - list_del(&tx->tx_list); - gmni->gmni_tx_credits--; - - spin_unlock(&gmni->gmni_tx_lock); - - /* Unlocking here allows sends to get re-ordered, but we want - * to allow other CPUs to progress... */ - - LASSERT(!tx->tx_credit); - tx->tx_credit = 1; - - tx->tx_launchtime = jiffies; - - if (tx->tx_msgnob <= gmni->gmni_small_msgsize) { - LASSERT (tx->tx_ltxb == NULL); - netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_buf); - gmsize = gmni->gmni_small_gmsize; - pri = GMNAL_SMALL_PRIORITY; - } else { - LASSERT (tx->tx_ltxb != NULL); - netaddr = GMNAL_NETBUF_LOCAL_NETADDR(&tx->tx_ltxb->txb_buf); - gmsize = gmni->gmni_large_gmsize; - pri = GMNAL_LARGE_PRIORITY; - } - - spin_lock(&gmni->gmni_gm_lock); - - gm_send_to_peer_with_callback(gmni->gmni_port, - netaddr, gmsize, - tx->tx_msgnob, - pri, - tx->tx_gmlid, - gmnal_tx_callback, - (void*)tx); - - spin_unlock(&gmni->gmni_gm_lock); - spin_lock(&gmni->gmni_tx_lock); - } -} - -void -gmnal_post_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - int gmsize = rx->rx_islarge ? gmni->gmni_large_gmsize : - gmni->gmni_small_gmsize; - int pri = rx->rx_islarge ? GMNAL_LARGE_PRIORITY : - GMNAL_SMALL_PRIORITY; - void *buffer = GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf); - - CDEBUG(D_NET, "posting rx %p buf %p\n", rx, buffer); - - spin_lock(&gmni->gmni_gm_lock); - gm_provide_receive_buffer_with_tag(gmni->gmni_port, - buffer, gmsize, pri, 0); - spin_unlock(&gmni->gmni_gm_lock); -} - -void -gmnal_version_reply (gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - /* Future protocol version compatibility support! - * The next gmlnd-specific protocol rev will first send a message to - * check version; I reply with a stub message containing my current - * magic+version... */ - gmnal_msg_t *msg; - gmnal_tx_t *tx = gmnal_get_tx(gmni); - - if (tx == NULL) { - CERROR("Can't allocate tx to send version info to %u\n", - rx->rx_recv_gmid); - return; - } - - LASSERT (tx->tx_lntmsg == NULL); /* no finalize */ - - tx->tx_nid = LNET_NID_ANY; - tx->tx_gmlid = rx->rx_recv_gmid; - - msg = GMNAL_NETBUF_MSG(&tx->tx_buf); - msg->gmm_magic = GMNAL_MSG_MAGIC; - msg->gmm_version = GMNAL_MSG_VERSION; - - /* just send magic + version */ - tx->tx_msgnob = offsetof(gmnal_msg_t, gmm_type); - tx->tx_large_nob = 0; - - spin_lock(&gmni->gmni_tx_lock); - - list_add_tail(&tx->tx_list, &gmni->gmni_buf_txq); - gmnal_check_txqueues_locked(gmni); - - spin_unlock(&gmni->gmni_tx_lock); -} - -int -gmnal_rx_thread(void *arg) -{ - gmnal_ni_t *gmni = arg; - gm_recv_event_t *rxevent = NULL; - gm_recv_t *recv = NULL; - gmnal_rx_t *rx; - int rc; - - cfs_daemonize("gmnal_rxd"); - - down(&gmni->gmni_rx_mutex); - - while (!gmni->gmni_shutdown) { - - spin_lock(&gmni->gmni_gm_lock); - rxevent = gm_blocking_receive_no_spin(gmni->gmni_port); - spin_unlock(&gmni->gmni_gm_lock); - - switch (GM_RECV_EVENT_TYPE(rxevent)) { - default: - gm_unknown(gmni->gmni_port, rxevent); - continue; - - case GM_FAST_RECV_EVENT: - case GM_FAST_PEER_RECV_EVENT: - case GM_PEER_RECV_EVENT: - case GM_FAST_HIGH_RECV_EVENT: - case GM_FAST_HIGH_PEER_RECV_EVENT: - case GM_HIGH_PEER_RECV_EVENT: - case GM_RECV_EVENT: - case GM_HIGH_RECV_EVENT: - break; - } - - recv = &rxevent->recv; - rx = gm_hash_find(gmni->gmni_rx_hash, - gm_ntohp(recv->buffer)); - LASSERT (rx != NULL); - - rx->rx_recv_nob = gm_ntoh_u32(recv->length); - rx->rx_recv_gmid = gm_ntoh_u16(recv->sender_node_id); - rx->rx_recv_port = gm_ntoh_u8(recv->sender_port_id); - rx->rx_recv_type = gm_ntoh_u8(recv->type); - - switch (GM_RECV_EVENT_TYPE(rxevent)) { - case GM_FAST_RECV_EVENT: - case GM_FAST_PEER_RECV_EVENT: - case GM_FAST_HIGH_RECV_EVENT: - case GM_FAST_HIGH_PEER_RECV_EVENT: - LASSERT (rx->rx_recv_nob <= PAGE_SIZE); - - memcpy(GMNAL_NETBUF_MSG(&rx->rx_buf), - gm_ntohp(recv->message), rx->rx_recv_nob); - break; - } - - up(&gmni->gmni_rx_mutex); - - CDEBUG (D_NET, "rx %p: buf %p(%p) nob %d\n", rx, - GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf), - gm_ntohp(recv->buffer), rx->rx_recv_nob); - - /* We're connectionless: simply drop packets with - * errors */ - rc = gmnal_unpack_msg(gmni, rx); - - if (rc == 0) { - gmnal_msg_t *msg = GMNAL_NETBUF_MSG(&rx->rx_buf); - - LASSERT (msg->gmm_type == GMNAL_MSG_IMMEDIATE); - rc = lnet_parse(gmni->gmni_ni, - &msg->gmm_u.immediate.gmim_hdr, - msg->gmm_srcnid, - rx, 0); - } else if (rc > 0) { - gmnal_version_reply(gmni, rx); - rc = -EPROTO; /* repost rx */ - } - - if (rc < 0) /* parse failure */ - gmnal_post_rx(gmni, rx); - - down(&gmni->gmni_rx_mutex); - } - - up(&gmni->gmni_rx_mutex); - - CDEBUG(D_NET, "exiting\n"); - atomic_dec(&gmni->gmni_nthreads); - return 0; -} - -void -gmnal_stop_threads(gmnal_ni_t *gmni) -{ - int count = 2; - - gmni->gmni_shutdown = 1; - mb(); - - /* wake rxthread owning gmni_rx_mutex with an alarm. */ - spin_lock(&gmni->gmni_gm_lock); - gm_set_alarm(gmni->gmni_port, &gmni->gmni_alarm, 0, NULL, NULL); - spin_unlock(&gmni->gmni_gm_lock); - - while (atomic_read(&gmni->gmni_nthreads) != 0) { - count++; - if ((count & (count - 1)) == 0) - CWARN("Waiting for %d threads to stop\n", - atomic_read(&gmni->gmni_nthreads)); - gmnal_yield(1); - } -} - -int -gmnal_start_threads(gmnal_ni_t *gmni) -{ - int i; - int pid; - - LASSERT (!gmni->gmni_shutdown); - LASSERT (atomic_read(&gmni->gmni_nthreads) == 0); - - gm_initialize_alarm(&gmni->gmni_alarm); - - for (i = 0; i < num_online_cpus(); i++) { - - pid = kernel_thread(gmnal_rx_thread, (void*)gmni, 0); - if (pid < 0) { - CERROR("rx thread failed to start: %d\n", pid); - gmnal_stop_threads(gmni); - return pid; - } - - atomic_inc(&gmni->gmni_nthreads); - } - - return 0; -} diff --git a/lnet/klnds/gmlnd/gmlnd_module.c b/lnet/klnds/gmlnd/gmlnd_module.c deleted file mode 100644 index 114a286a53307246a93ba840b334d0ca2c34ca01..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_module.c +++ /dev/null @@ -1,130 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmlnd.h" - - -static int port = 4; -CFS_MODULE_PARM(port, "i", int, 0444, - "GM port to use for communications"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# tx descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends per peer"); - -static int nlarge_tx_bufs = 32; -CFS_MODULE_PARM(nlarge_tx_bufs, "i", int, 0444, - "# large tx message buffers"); - -static int nrx_small = 128; -CFS_MODULE_PARM(nrx_small, "i", int, 0444, - "# small rx message buffers"); - -static int nrx_large = 64; -CFS_MODULE_PARM(nrx_large, "i", int, 0444, - "# large rx message buffers"); - -gmnal_tunables_t gmnal_tunables = { - .gm_port = &port, - .gm_ntx = &ntx, - .gm_credits = &credits, - .gm_peer_credits = &peer_credits, - .gm_nlarge_tx_bufs = &nlarge_tx_bufs, - .gm_nrx_small = &nrx_small, - .gm_nrx_large = &nrx_large, -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table gmnal_ctl_table[] = { - {1, "port", &port, - sizeof (int), 0444, NULL, &proc_dointvec}, - {2, "ntx", &ntx, - sizeof (int), 0444, NULL, &proc_dointvec}, - {3, "credits", &credits, - sizeof (int), 0444, NULL, &proc_dointvec}, - {4, "peer_credits", &peer_credits, - sizeof (int), 0444, NULL, &proc_dointvec}, - {5, "nlarge_tx_bufs", &nlarge_tx_bufs, - sizeof (int), 0444, NULL, &proc_dointvec}, - {6, "nrx_small", &nrx_small, - sizeof (int), 0444, NULL, &proc_dointvec}, - {7, "nrx_large", &nrx_large, - sizeof (int), 0444, NULL, &proc_dointvec}, - {0} -}; - -static ctl_table gmnal_top_ctl_table[] = { - {207, "gmnal", NULL, 0, 0555, gmnal_ctl_table}, - {0} -}; -#endif - -static int __init -gmnal_load(void) -{ - int status; - CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n"); - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - gmnal_tunables.gm_sysctl = - register_sysctl_table(gmnal_top_ctl_table, 0); - - if (gmnal_tunables.gm_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); -#endif - CDEBUG(D_NET, "Calling gmnal_init\n"); - status = gmnal_init(); - if (status == 0) { - CDEBUG(D_NET, "Portals GMNAL initialised ok\n"); - } else { - CDEBUG(D_NET, "Portals GMNAL Failed to initialise\n"); - return(-ENODEV); - } - - CDEBUG(D_NET, "This is the end of the gmnal init routine"); - - return(0); -} - -static void __exit -gmnal_unload(void) -{ - gmnal_fini(); -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - if (gmnal_tunables.gm_sysctl != NULL) - unregister_sysctl_table(gmnal_tunables.gm_sysctl); -#endif -} - -module_init(gmnal_load); -module_exit(gmnal_unload); - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel GM LND v1.01"); -MODULE_LICENSE("GPL"); diff --git a/lnet/klnds/gmlnd/gmlnd_utils.c b/lnet/klnds/gmlnd/gmlnd_utils.c deleted file mode 100644 index 98107317b463a561f42751bb4d96755503bdf09d..0000000000000000000000000000000000000000 --- a/lnet/klnds/gmlnd/gmlnd_utils.c +++ /dev/null @@ -1,579 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmlnd.h" - -void -gmnal_free_netbuf_pages (gmnal_netbuf_t *nb, int npages) -{ - int i; - - for (i = 0; i < npages; i++) - __free_page(nb->nb_kiov[i].kiov_page); -} - -int -gmnal_alloc_netbuf_pages (gmnal_ni_t *gmni, gmnal_netbuf_t *nb, int npages) -{ - int i; - gm_status_t gmrc; - - LASSERT (npages > 0); - - for (i = 0; i < npages; i++) { - - nb->nb_kiov[i].kiov_page = alloc_page(GFP_KERNEL); - nb->nb_kiov[i].kiov_offset = 0; - nb->nb_kiov[i].kiov_len = PAGE_SIZE; - - if (nb->nb_kiov[i].kiov_page == NULL) { - CERROR("Can't allocate page\n"); - gmnal_free_netbuf_pages(nb, i); - return -ENOMEM; - } - - CDEBUG(D_NET,"[%3d] page %p, phys "LPX64", @ "LPX64"\n", - i, nb->nb_kiov[i].kiov_page, - lnet_page2phys(nb->nb_kiov[i].kiov_page), - gmni->gmni_netaddr_base); - - gmrc = gm_register_memory_ex_phys( - gmni->gmni_port, - lnet_page2phys(nb->nb_kiov[i].kiov_page), - PAGE_SIZE, - gmni->gmni_netaddr_base); - CDEBUG(D_NET,"[%3d] page %p: %d\n", - i, nb->nb_kiov[i].kiov_page, gmrc); - - if (gmrc != GM_SUCCESS) { - CERROR("Can't map page: %d(%s)\n", gmrc, - gmnal_gmstatus2str(gmrc)); - gmnal_free_netbuf_pages(nb, i+1); - return -ENOMEM; - } - - if (i == 0) - nb->nb_netaddr = gmni->gmni_netaddr_base; - - gmni->gmni_netaddr_base += PAGE_SIZE; - } - - return 0; -} - -void -gmnal_free_ltxbuf (gmnal_ni_t *gmni, gmnal_txbuf_t *txb) -{ - int npages = gmni->gmni_large_pages; - - LASSERT (gmni->gmni_port == NULL); - /* No unmapping; the port has been closed */ - - gmnal_free_netbuf_pages(&txb->txb_buf, gmni->gmni_large_pages); - LIBCFS_FREE(txb, offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages])); -} - -int -gmnal_alloc_ltxbuf (gmnal_ni_t *gmni) -{ - int npages = gmni->gmni_large_pages; - int sz = offsetof(gmnal_txbuf_t, txb_buf.nb_kiov[npages]); - gmnal_txbuf_t *txb; - int rc; - - LIBCFS_ALLOC(txb, sz); - if (txb == NULL) { - CERROR("Can't allocate large txbuffer\n"); - return -ENOMEM; - } - - rc = gmnal_alloc_netbuf_pages(gmni, &txb->txb_buf, npages); - if (rc != 0) { - LIBCFS_FREE(txb, sz); - return rc; - } - - list_add_tail(&txb->txb_list, &gmni->gmni_idle_ltxbs); - - txb->txb_next = gmni->gmni_ltxbs; - gmni->gmni_ltxbs = txb; - - return 0; -} - -void -gmnal_free_tx (gmnal_tx_t *tx) -{ - LASSERT (tx->tx_gmni->gmni_port == NULL); - - gmnal_free_netbuf_pages(&tx->tx_buf, 1); - LIBCFS_FREE(tx, sizeof(*tx)); -} - -int -gmnal_alloc_tx (gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - int rc; - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Failed to allocate tx\n"); - return -ENOMEM; - } - - memset(tx, 0, sizeof(*tx)); - - rc = gmnal_alloc_netbuf_pages(gmni, &tx->tx_buf, 1); - if (rc != 0) { - LIBCFS_FREE(tx, sizeof(*tx)); - return -ENOMEM; - } - - tx->tx_gmni = gmni; - - list_add_tail(&tx->tx_list, &gmni->gmni_idle_txs); - - tx->tx_next = gmni->gmni_txs; - gmni->gmni_txs = tx; - - return 0; -} - -void -gmnal_free_rx(gmnal_ni_t *gmni, gmnal_rx_t *rx) -{ - int npages = rx->rx_islarge ? gmni->gmni_large_pages : 1; - - LASSERT (gmni->gmni_port == NULL); - - gmnal_free_netbuf_pages(&rx->rx_buf, npages); - LIBCFS_FREE(rx, offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages])); -} - -int -gmnal_alloc_rx (gmnal_ni_t *gmni, int islarge) -{ - int npages = islarge ? gmni->gmni_large_pages : 1; - int sz = offsetof(gmnal_rx_t, rx_buf.nb_kiov[npages]); - int rc; - gmnal_rx_t *rx; - gm_status_t gmrc; - - LIBCFS_ALLOC(rx, sz); - if (rx == NULL) { - CERROR("Failed to allocate rx\n"); - return -ENOMEM; - } - - memset(rx, 0, sizeof(*rx)); - - rc = gmnal_alloc_netbuf_pages(gmni, &rx->rx_buf, npages); - if (rc != 0) { - LIBCFS_FREE(rx, sz); - return rc; - } - - rx->rx_islarge = islarge; - rx->rx_next = gmni->gmni_rxs; - gmni->gmni_rxs = rx; - - gmrc = gm_hash_insert(gmni->gmni_rx_hash, - GMNAL_NETBUF_LOCAL_NETADDR(&rx->rx_buf), rx); - if (gmrc != GM_SUCCESS) { - CERROR("Couldn't add rx to hash table: %d\n", gmrc); - return -ENOMEM; - } - - return 0; -} - -void -gmnal_free_ltxbufs (gmnal_ni_t *gmni) -{ - gmnal_txbuf_t *txb; - - while ((txb = gmni->gmni_ltxbs) != NULL) { - gmni->gmni_ltxbs = txb->txb_next; - gmnal_free_ltxbuf(gmni, txb); - } -} - -int -gmnal_alloc_ltxbufs (gmnal_ni_t *gmni) -{ - int nlarge_tx_bufs = *gmnal_tunables.gm_nlarge_tx_bufs; - int i; - int rc; - - for (i = 0; i < nlarge_tx_bufs; i++) { - rc = gmnal_alloc_ltxbuf(gmni); - - if (rc != 0) - return rc; - } - - return 0; -} - -void -gmnal_free_txs(gmnal_ni_t *gmni) -{ - gmnal_tx_t *tx; - - while ((tx = gmni->gmni_txs) != NULL) { - gmni->gmni_txs = tx->tx_next; - gmnal_free_tx (tx); - } -} - -int -gmnal_alloc_txs(gmnal_ni_t *gmni) -{ - int ntxcred = gm_num_send_tokens(gmni->gmni_port); - int ntx = *gmnal_tunables.gm_ntx; - int i; - int rc; - - CDEBUG(D_NET, "ntxcred: %d\n", ntxcred); - gmni->gmni_tx_credits = ntxcred; - - for (i = 0; i < ntx; i++) { - rc = gmnal_alloc_tx(gmni); - if (rc != 0) - return rc; - } - - return 0; -} - -void -gmnal_free_rxs(gmnal_ni_t *gmni) -{ - gmnal_rx_t *rx; - - while ((rx = gmni->gmni_rxs) != NULL) { - gmni->gmni_rxs = rx->rx_next; - - gmnal_free_rx(gmni, rx); - } - - LASSERT (gmni->gmni_port == NULL); -#if 0 - /* GM releases all resources allocated to a port when it closes */ - if (gmni->gmni_rx_hash != NULL) - gm_destroy_hash(gmni->gmni_rx_hash); -#endif -} - -int -gmnal_alloc_rxs (gmnal_ni_t *gmni) -{ - int nrxcred = gm_num_receive_tokens(gmni->gmni_port); - int nrx_small = *gmnal_tunables.gm_nrx_small; - int nrx_large = *gmnal_tunables.gm_nrx_large; - int nrx = nrx_large + nrx_small; - int rc; - int i; - - CDEBUG(D_NET, "nrxcred: %d(%dL+%dS)\n", nrxcred, nrx_large, nrx_small); - - if (nrx > nrxcred) { - int nlarge = (nrx_large * nrxcred)/nrx; - int nsmall = nrxcred - nlarge; - - CWARN("Only %d rx credits: " - "reducing large %d->%d, small %d->%d\n", nrxcred, - nrx_large, nlarge, nrx_small, nsmall); - - *gmnal_tunables.gm_nrx_large = nrx_large = nlarge; - *gmnal_tunables.gm_nrx_small = nrx_small = nsmall; - nrx = nlarge + nsmall; - } - - gmni->gmni_rx_hash = gm_create_hash(gm_hash_compare_ptrs, - gm_hash_hash_ptr, 0, 0, nrx, 0); - if (gmni->gmni_rx_hash == NULL) { - CERROR("Failed to create hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < nrx; i++ ) { - rc = gmnal_alloc_rx(gmni, i < nrx_large); - if (rc != 0) - return rc; - } - - return 0; -} - -char * -gmnal_gmstatus2str(gm_status_t status) -{ - return(gm_strerror(status)); - - switch(status) { - case(GM_SUCCESS): - return("SUCCESS"); - case(GM_FAILURE): - return("FAILURE"); - case(GM_INPUT_BUFFER_TOO_SMALL): - return("INPUT_BUFFER_TOO_SMALL"); - case(GM_OUTPUT_BUFFER_TOO_SMALL): - return("OUTPUT_BUFFER_TOO_SMALL"); - case(GM_TRY_AGAIN ): - return("TRY_AGAIN"); - case(GM_BUSY): - return("BUSY"); - case(GM_MEMORY_FAULT): - return("MEMORY_FAULT"); - case(GM_INTERRUPTED): - return("INTERRUPTED"); - case(GM_INVALID_PARAMETER): - return("INVALID_PARAMETER"); - case(GM_OUT_OF_MEMORY): - return("OUT_OF_MEMORY"); - case(GM_INVALID_COMMAND): - return("INVALID_COMMAND"); - case(GM_PERMISSION_DENIED): - return("PERMISSION_DENIED"); - case(GM_INTERNAL_ERROR): - return("INTERNAL_ERROR"); - case(GM_UNATTACHED): - return("UNATTACHED"); - case(GM_UNSUPPORTED_DEVICE): - return("UNSUPPORTED_DEVICE"); - case(GM_SEND_TIMED_OUT): - return("GM_SEND_TIMEDOUT"); - case(GM_SEND_REJECTED): - return("GM_SEND_REJECTED"); - case(GM_SEND_TARGET_PORT_CLOSED): - return("GM_SEND_TARGET_PORT_CLOSED"); - case(GM_SEND_TARGET_NODE_UNREACHABLE): - return("GM_SEND_TARGET_NODE_UNREACHABLE"); - case(GM_SEND_DROPPED): - return("GM_SEND_DROPPED"); - case(GM_SEND_PORT_CLOSED): - return("GM_SEND_PORT_CLOSED"); - case(GM_NODE_ID_NOT_YET_SET): - return("GM_NODE_ID_NOT_YET_SET"); - case(GM_STILL_SHUTTING_DOWN): - return("GM_STILL_SHUTTING_DOWN"); - case(GM_CLONE_BUSY): - return("GM_CLONE_BUSY"); - case(GM_NO_SUCH_DEVICE): - return("GM_NO_SUCH_DEVICE"); - case(GM_ABORTED): - return("GM_ABORTED"); - case(GM_INCOMPATIBLE_LIB_AND_DRIVER): - return("GM_INCOMPATIBLE_LIB_AND_DRIVER"); - case(GM_UNTRANSLATED_SYSTEM_ERROR): - return("GM_UNTRANSLATED_SYSTEM_ERROR"); - case(GM_ACCESS_DENIED): - return("GM_ACCESS_DENIED"); - - - /* - * These ones are in the docs but aren't in the header file - case(GM_DEV_NOT_FOUND): - return("GM_DEV_NOT_FOUND"); - case(GM_INVALID_PORT_NUMBER): - return("GM_INVALID_PORT_NUMBER"); - case(GM_UC_ERROR): - return("GM_US_ERROR"); - case(GM_PAGE_TABLE_FULL): - return("GM_PAGE_TABLE_FULL"); - case(GM_MINOR_OVERFLOW): - return("GM_MINOR_OVERFLOW"); - case(GM_SEND_ORPHANED): - return("GM_SEND_ORPHANED"); - case(GM_HARDWARE_FAULT): - return("GM_HARDWARE_FAULT"); - case(GM_DATA_CORRUPTED): - return("GM_DATA_CORRUPTED"); - case(GM_TIMED_OUT): - return("GM_TIMED_OUT"); - case(GM_USER_ERROR): - return("GM_USER_ERROR"); - case(GM_NO_MATCH): - return("GM_NOMATCH"); - case(GM_NOT_SUPPORTED_IN_KERNEL): - return("GM_NOT_SUPPORTED_IN_KERNEL"); - case(GM_NOT_SUPPORTED_ON_ARCH): - return("GM_NOT_SUPPORTED_ON_ARCH"); - case(GM_PTE_REF_CNT_OVERFLOW): - return("GM_PTR_REF_CNT_OVERFLOW"); - case(GM_NO_DRIVER_SUPPORT): - return("GM_NO_DRIVER_SUPPORT"); - case(GM_FIRMWARE_NOT_RUNNING): - return("GM_FIRMWARE_NOT_RUNNING"); - * These ones are in the docs but aren't in the header file - */ - - default: - return("UNKNOWN GM ERROR CODE"); - } -} - - -char * -gmnal_rxevent2str(gm_recv_event_t *ev) -{ - short event; - event = GM_RECV_EVENT_TYPE(ev); - switch(event) { - case(GM_NO_RECV_EVENT): - return("GM_NO_RECV_EVENT"); - case(GM_SENDS_FAILED_EVENT): - return("GM_SEND_FAILED_EVENT"); - case(GM_ALARM_EVENT): - return("GM_ALARM_EVENT"); - case(GM_SENT_EVENT): - return("GM_SENT_EVENT"); - case(_GM_SLEEP_EVENT): - return("_GM_SLEEP_EVENT"); - case(GM_RAW_RECV_EVENT): - return("GM_RAW_RECV_EVENT"); - case(GM_BAD_SEND_DETECTED_EVENT): - return("GM_BAD_SEND_DETECTED_EVENT"); - case(GM_SEND_TOKEN_VIOLATION_EVENT): - return("GM_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_RECV_TOKEN_VIOLATION_EVENT): - return("GM_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_BAD_RECV_TOKEN_EVENT): - return("GM_BAD_RECV_TOKEN_EVENT"); - case(GM_ALARM_VIOLATION_EVENT): - return("GM_ALARM_VIOLATION_EVENT"); - case(GM_RECV_EVENT): - return("GM_RECV_EVENT"); - case(GM_HIGH_RECV_EVENT): - return("GM_HIGH_RECV_EVENT"); - case(GM_PEER_RECV_EVENT): - return("GM_PEER_RECV_EVENT"); - case(GM_HIGH_PEER_RECV_EVENT): - return("GM_HIGH_PEER_RECV_EVENT"); - case(GM_FAST_RECV_EVENT): - return("GM_FAST_RECV_EVENT"); - case(GM_FAST_HIGH_RECV_EVENT): - return("GM_FAST_HIGH_RECV_EVENT"); - case(GM_FAST_PEER_RECV_EVENT): - return("GM_FAST_PEER_RECV_EVENT"); - case(GM_FAST_HIGH_PEER_RECV_EVENT): - return("GM_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_REJECTED_SEND_EVENT): - return("GM_REJECTED_SEND_EVENT"); - case(GM_ORPHANED_SEND_EVENT): - return("GM_ORPHANED_SEND_EVENT"); - case(GM_BAD_RESEND_DETECTED_EVENT): - return("GM_BAD_RESEND_DETETED_EVENT"); - case(GM_DROPPED_SEND_EVENT): - return("GM_DROPPED_SEND_EVENT"); - case(GM_BAD_SEND_VMA_EVENT): - return("GM_BAD_SEND_VMA_EVENT"); - case(GM_BAD_RECV_VMA_EVENT): - return("GM_BAD_RECV_VMA_EVENT"); - case(_GM_FLUSHED_ALARM_EVENT): - return("GM_FLUSHED_ALARM_EVENT"); - case(GM_SENT_TOKENS_EVENT): - return("GM_SENT_TOKENS_EVENTS"); - case(GM_IGNORE_RECV_EVENT): - return("GM_IGNORE_RECV_EVENT"); - case(GM_ETHERNET_RECV_EVENT): - return("GM_ETHERNET_RECV_EVENT"); - case(GM_NEW_NO_RECV_EVENT): - return("GM_NEW_NO_RECV_EVENT"); - case(GM_NEW_SENDS_FAILED_EVENT): - return("GM_NEW_SENDS_FAILED_EVENT"); - case(GM_NEW_ALARM_EVENT): - return("GM_NEW_ALARM_EVENT"); - case(GM_NEW_SENT_EVENT): - return("GM_NEW_SENT_EVENT"); - case(_GM_NEW_SLEEP_EVENT): - return("GM_NEW_SLEEP_EVENT"); - case(GM_NEW_RAW_RECV_EVENT): - return("GM_NEW_RAW_RECV_EVENT"); - case(GM_NEW_BAD_SEND_DETECTED_EVENT): - return("GM_NEW_BAD_SEND_DETECTED_EVENT"); - case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT): - return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT): - return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT"); - case(GM_NEW_BAD_RECV_TOKEN_EVENT): - return("GM_NEW_BAD_RECV_TOKEN_EVENT"); - case(GM_NEW_ALARM_VIOLATION_EVENT): - return("GM_NEW_ALARM_VIOLATION_EVENT"); - case(GM_NEW_RECV_EVENT): - return("GM_NEW_RECV_EVENT"); - case(GM_NEW_HIGH_RECV_EVENT): - return("GM_NEW_HIGH_RECV_EVENT"); - case(GM_NEW_PEER_RECV_EVENT): - return("GM_NEW_PEER_RECV_EVENT"); - case(GM_NEW_HIGH_PEER_RECV_EVENT): - return("GM_NEW_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_FAST_RECV_EVENT): - return("GM_NEW_FAST_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_RECV_EVENT): - return("GM_NEW_FAST_HIGH_RECV_EVENT"); - case(GM_NEW_FAST_PEER_RECV_EVENT): - return("GM_NEW_FAST_PEER_RECV_EVENT"); - case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT): - return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT"); - case(GM_NEW_REJECTED_SEND_EVENT): - return("GM_NEW_REJECTED_SEND_EVENT"); - case(GM_NEW_ORPHANED_SEND_EVENT): - return("GM_NEW_ORPHANED_SEND_EVENT"); - case(_GM_NEW_PUT_NOTIFICATION_EVENT): - return("_GM_NEW_PUT_NOTIFICATION_EVENT"); - case(GM_NEW_FREE_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_SEND_TOKEN_EVENT"); - case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT): - return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT"); - case(GM_NEW_BAD_RESEND_DETECTED_EVENT): - return("GM_NEW_BAD_RESEND_DETECTED_EVENT"); - case(GM_NEW_DROPPED_SEND_EVENT): - return("GM_NEW_DROPPED_SEND_EVENT"); - case(GM_NEW_BAD_SEND_VMA_EVENT): - return("GM_NEW_BAD_SEND_VMA_EVENT"); - case(GM_NEW_BAD_RECV_VMA_EVENT): - return("GM_NEW_BAD_RECV_VMA_EVENT"); - case(_GM_NEW_FLUSHED_ALARM_EVENT): - return("GM_NEW_FLUSHED_ALARM_EVENT"); - case(GM_NEW_SENT_TOKENS_EVENT): - return("GM_NEW_SENT_TOKENS_EVENT"); - case(GM_NEW_IGNORE_RECV_EVENT): - return("GM_NEW_IGNORE_RECV_EVENT"); - case(GM_NEW_ETHERNET_RECV_EVENT): - return("GM_NEW_ETHERNET_RECV_EVENT"); - default: - return("Unknown Recv event"); - /* _GM_PUT_NOTIFICATION_EVENT */ - /* GM_FREE_SEND_TOKEN_EVENT */ - /* GM_FREE_HIGH_SEND_TOKEN_EVENT */ - } -} - - -void -gmnal_yield(int delay) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(delay); -} diff --git a/lnet/klnds/iiblnd/.cvsignore b/lnet/klnds/iiblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/iiblnd/Makefile.in b/lnet/klnds/iiblnd/Makefile.in deleted file mode 100644 index 7ee9b6444ab92fa63b1edaa50d5c7c58dfd1c93c..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kiiblnd -kiiblnd-objs := iiblnd.o iiblnd_cb.o iiblnd_modparams.o - -EXTRA_POST_CFLAGS := @IIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/iiblnd/autoMakefile.am b/lnet/klnds/iiblnd/autoMakefile.am deleted file mode 100644 index d08d07973f1ae42294ad601e43b046ed3073d447..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_IIBLND -modulenet_DATA = kiiblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kiiblnd-objs:%.o=%.c) iiblnd.h diff --git a/lnet/klnds/iiblnd/iiblnd.c b/lnet/klnds/iiblnd/iiblnd.c deleted file mode 100644 index 27b31a5e5d0d8932defc137607bc0863fa5408d3..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd.c +++ /dev/null @@ -1,2150 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = IIBLND, - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, -}; - -kib_data_t kibnal_data; - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - msg->ibm_seq = seq; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -void -kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, - int type, lnet_nid_t dstnid, __u64 dststamp) -{ - LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - memset(msg, 0, nob); - kibnal_init_msg(msg, type, sizeof(kib_connparams_t)); - - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - - kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0); -} - -int -kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - __u32 msg_version; - int flip; - int msg_nob; -#if !IBNAL_USE_FMR - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - /* Future protocol version compatibility support! - * If the iiblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will negotiate a - * protocol version. If I find this, I avoid any console errors. If - * my is doing connection establishment, the reject will tell the peer - * which version I'm running. */ - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - return -EPROTO; - - /* Completely out to lunch */ - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (expected_version == 0) { - if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - msg_version != IBNAL_MSG_VERSION) - return -EPROTO; - } else if (msg_version != expected_version) { - CERROR("Bad version: %x(%x expected)\n", - msg_version, expected_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - __swab64s(&msg->ibm_seq); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); - } - } -#endif - break; - - case IBNAL_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); - } -#endif - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - __swab32s(&msg->ibm_u.connparams.ibcp_max_frags); - } - break; - } - return 0; -} - -IB_HANDLE -kibnal_create_cep(lnet_nid_t nid) -{ - FSTATUS frc; - __u32 u32val; - IB_HANDLE cep; - - cep = iba_cm_create_cep(CM_RC_TYPE); - if (cep == NULL) { - CERROR ("Can't create CEP for %s\n", - (nid == LNET_NID_ANY) ? "listener" : - libcfs_nid2str(nid)); - return NULL; - } - - if (nid == LNET_NID_ANY) { - u32val = 1; - frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set async_accept: %d\n", frc); - goto failed; - } - - u32val = 0; /* sets system max */ - frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set listen backlog: %d\n", frc); - goto failed; - } - } - - u32val = 1; - frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK, - (char *)&u32val, sizeof(u32val), 0); - if (frc != FSUCCESS) { - CERROR("Can't set timewait_callback for %s: %d\n", - (nid == LNET_NID_ANY) ? "listener" : - libcfs_nid2str(nid), frc); - goto failed; - } - - return cep; - - failed: - iba_cm_destroy_cep(cep); - return NULL; -} - -#define IBNAL_CHECK_ADVERT 1 -#if IBNAL_CHECK_ADVERT -void -kibnal_service_query_done (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qry_result) -{ - int *rcp = arg; - FSTATUS frc = qry_result->Status; - SERVICE_RECORD_RESULTS *svc_rslt; - IB_SERVICE_RECORD *svc; - lnet_nid_t nid; - - if (frc != FSUCCESS || qry_result->ResultDataSize == 0) { - CERROR("Error checking advert: status %d data size %d\n", - frc, qry_result->ResultDataSize); - *rcp = -EIO; - goto out; - } - - svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult; - - if (svc_rslt->NumServiceRecords < 1) { - CERROR("Check advert: %d records\n", - svc_rslt->NumServiceRecords); - *rcp = -ENOENT; - goto out; - } - - svc = &svc_rslt->ServiceRecords[0]; - nid = le64_to_cpu(*kibnal_service_nid_field(svc)); - - CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n", - libcfs_nid2str(nid), svc->RID.ServiceID, - svc->RID.ServiceGID.Type.Global.InterfaceID, - svc->RID.ServiceP_Key); - - if (nid != kibnal_data.kib_ni->ni_nid) { - CERROR("Check advert: Bad NID %s (%s expected)\n", - libcfs_nid2str(nid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) { - CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n", - svc->RID.ServiceID, - *kibnal_tunables.kib_service_number); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceGID.Type.Global.InterfaceID != - kibnal_data.kib_port_guid) { - CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n", - svc->RID.ServiceGID.Type.Global.InterfaceID, - kibnal_data.kib_port_guid); - *rcp = -EINVAL; - goto out; - } - - if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) { - CERROR("Check advert: Bad PKEY %04x (%04x expected)\n", - svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey); - *rcp = -EINVAL; - goto out; - } - - CDEBUG(D_NET, "Check advert OK\n"); - *rcp = 0; - - out: - up (&kibnal_data.kib_listener_signal); -} - -int -kibnal_check_advert (void) -{ - /* single-threaded */ - static QUERY qry; - - FSTATUS frc; - int rc; - - memset (&qry, 0, sizeof(qry)); - qry.InputType = InputTypeServiceRecord; - qry.OutputType = OutputTypeServiceRecord; - kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord, - kibnal_data.kib_ni->ni_nid); - qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK; - - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &qry, - kibnal_service_query_done, - &kibnal_data.kib_sdretry, - &rc); - if (frc != FPENDING) { - CERROR ("Immediate error %d checking SM service\n", frc); - return -EIO; - } - - down (&kibnal_data.kib_listener_signal); - - if (rc != 0) - CERROR ("Error %d checking SM service\n", rc); - return rc; -} -#else -int -kibnal_check_advert(void) -{ - return 0; -} -#endif - -void -kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type) -{ - IB_SERVICE_RECORD *svc; - - memset (fod, 0, sizeof(*fod)); - fod->Type = type; - - svc = &fod->Value.ServiceRecordValue.ServiceRecord; - svc->RID.ServiceID = *kibnal_tunables.kib_service_number; - svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid; - svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX; - svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey; - svc->ServiceLease = 0xffffffff; - - kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid); -} - -void -kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod, - FSTATUS frc, uint32 madrc) -{ - *(FSTATUS *)arg = frc; - up (&kibnal_data.kib_listener_signal); -} - -int -kibnal_advertise (void) -{ - /* Single threaded here */ - static FABRIC_OPERATION_DATA fod; - - IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord; - FSTATUS frc; - FSTATUS frc2; - - if (strlen(*kibnal_tunables.kib_service_name) >= - sizeof(svc->ServiceName)) { - CERROR("Service name '%s' too long (%d chars max)\n", - *kibnal_tunables.kib_service_name, - (int)sizeof(svc->ServiceName) - 1); - return -EINVAL; - } - - kibnal_fill_fod(&fod, FabOpSetServiceRecord); - - CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n", - svc->RID.ServiceID, svc->ServiceName, - libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc)))); - - frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &fod, - kibnal_service_setunset_done, - &kibnal_data.kib_sdretry, - &frc2); - - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("Immediate error %d advertising NID %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return -EIO; - } - - down (&kibnal_data.kib_listener_signal); - - frc = frc2; - if (frc == FSUCCESS) - return 0; - - CERROR ("Error %d advertising %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return -EIO; -} - -void -kibnal_unadvertise (int expect_success) -{ - /* single threaded */ - static FABRIC_OPERATION_DATA fod; - - IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord; - FSTATUS frc; - FSTATUS frc2; - - LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY); - - kibnal_fill_fod(&fod, FabOpDeleteServiceRecord); - - CDEBUG(D_NET, "Unadvertising service %s:%s\n", - svc->ServiceName, - libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc)))); - - frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - &fod, - kibnal_service_setunset_done, - &kibnal_data.kib_sdretry, - &frc2); - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("Immediate error %d unadvertising NID %s\n", - frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - return; - } - - down (&kibnal_data.kib_listener_signal); - - CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2); - - if ((frc2 == FSUCCESS) == !!expect_success) - return; - - if (expect_success) - CERROR("Error %d unadvertising NID %s\n", - frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - else - CWARN("Removed conflicting NID %s\n", - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); -} - -void -kibnal_stop_listener(int normal_shutdown) -{ - /* NB this also disables peer creation and destroys all existing - * peers */ - IB_HANDLE cep = kibnal_data.kib_listener_cep; - unsigned long flags; - FSTATUS frc; - - LASSERT (cep != NULL); - - kibnal_unadvertise(normal_shutdown); - - frc = iba_cm_cancel(cep); - if (frc != FSUCCESS && frc != FPENDING) - CERROR ("Error %d stopping listener\n", frc); - - down(&kibnal_data.kib_listener_signal); - - frc = iba_cm_destroy_cep(cep); - if (frc != FSUCCESS) - CERROR ("Error %d destroying listener CEP\n", frc); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* This assignment disables peer creation */ - kibnal_data.kib_listener_cep = NULL; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Start to tear down any peers created while the listener was - * running */ - kibnal_del_peer(LNET_NID_ANY); -} - -int -kibnal_start_listener(void) -{ - /* NB this also enables peer creation */ - - IB_HANDLE cep; - CM_LISTEN_INFO info; - unsigned long flags; - int rc; - FSTATUS frc; - - LASSERT (kibnal_data.kib_listener_cep == NULL); - init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal); - - cep = kibnal_create_cep(LNET_NID_ANY); - if (cep == NULL) - return -ENOMEM; - - memset (&info, 0, sizeof(info)); - info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number; - - frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL); - if (frc != FSUCCESS && frc != FPENDING) { - CERROR ("iba_cm_listen error: %d\n", frc); - - iba_cm_destroy_cep(cep); - return -EIO; - } - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* This assignment enables peer creation */ - kibnal_data.kib_listener_cep = cep; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - rc = kibnal_advertise(); - if (rc == 0) - rc = kibnal_check_advert(); - - if (rc == 0) - return 0; - - kibnal_stop_listener(0); - return rc; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_listener_cep == NULL) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with the global lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (!kibnal_peer_connecting(peer)); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec (&kibnal_data.kib_npeers); -} - -/* the caller is responsible for accounting for the additional reference - * that this creates */ -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer %s (%d)\n", - libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount)); - return (peer); - } - return (NULL); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - kib_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref (peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_persistence++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -int -kibnal_conn_rts(kib_conn_t *conn, - __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn) -{ - IB_PATH_RECORD *path = &conn->ibc_cvars->cv_path; - IB_HANDLE qp = conn->ibc_qp; - IB_QP_ATTRIBUTES_MODIFY modify_attr; - FSTATUS frc; - int rc; - - if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources) - resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources; - - if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth) - init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth; - - modify_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateReadyToRecv, - .RecvPSN = IBNAL_STARTING_PSN, - .DestQPNumber = qpn, - .ResponderResources = resp_res, - .MinRnrTimer = UsecToRnrNakTimer(2000), /* 20 ms */ - .Attrs = (IB_QP_ATTR_RECVPSN | - IB_QP_ATTR_DESTQPNUMBER | - IB_QP_ATTR_RESPONDERRESOURCES | - IB_QP_ATTR_DESTAV | - IB_QP_ATTR_PATHMTU | - IB_QP_ATTR_MINRNRTIMER), - }; - GetAVFromPath(0, path, &modify_attr.PathMTU, NULL, - &modify_attr.DestAV); - - frc = iba_modify_qp(qp, &modify_attr, NULL); - if (frc != FSUCCESS) { - CERROR("Can't set QP %s ready to receive: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - return rc; - } - - modify_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateReadyToSend, - .FlowControl = TRUE, - .InitiatorDepth = init_depth, - .SendPSN = psn, - .LocalAckTimeout = path->PktLifeTime + 2, /* 2 or 1? */ - .RetryCount = IBNAL_RETRY, - .RnrRetryCount = IBNAL_RNR_RETRY, - .Attrs = (IB_QP_ATTR_FLOWCONTROL | - IB_QP_ATTR_INITIATORDEPTH | - IB_QP_ATTR_SENDPSN | - IB_QP_ATTR_LOCALACKTIMEOUT | - IB_QP_ATTR_RETRYCOUNT | - IB_QP_ATTR_RNRRETRYCOUNT), - }; - - frc = iba_modify_qp(qp, &modify_attr, NULL); - if (frc != FSUCCESS) { - CERROR("Can't set QP %s ready to send: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't query QP %s attributes: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - return -EIO; - } - - return 0; -} - -kib_conn_t * -kibnal_create_conn (lnet_nid_t nid, int proto_version) -{ - kib_conn_t *conn; - int i; - int page_offset; - int ipage; - int rc; - FSTATUS frc; - union { - IB_QP_ATTRIBUTES_CREATE qp_create; - IB_QP_ATTRIBUTES_MODIFY qp_attr; - } params; - - LIBCFS_ALLOC (conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection for %s\n", - libcfs_nid2str(nid)); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - conn->ibc_state = IBNAL_CONN_INIT_NOTHING; - conn->ibc_version = proto_version; - - INIT_LIST_HEAD (&conn->ibc_early_rxs); - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars)); - if (conn->ibc_cvars == NULL) { - CERROR ("Can't allocate connvars for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX descriptors for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES); - if (rc != 0) { - CERROR("Can't allocate RX buffers for %s\n", - libcfs_nid2str(nid)); - goto failed; - } - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - params.qp_create = (IB_QP_ATTRIBUTES_CREATE) { - .Type = QPTypeReliableConnected, - .SendQDepth = (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends), - .RecvQDepth = IBNAL_RX_MSGS, - .SendDSListDepth = 1, - .RecvDSListDepth = 1, - .SendCQHandle = kibnal_data.kib_cq, - .RecvCQHandle = kibnal_data.kib_cq, - .PDHandle = kibnal_data.kib_pd, - .SendSignaledCompletions = TRUE, - }; - frc = iba_create_qp(kibnal_data.kib_hca, ¶ms.qp_create, NULL, - &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs); - if (frc != 0) { - CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc); - goto failed; - } - - /* Mark QP created */ - kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP); - - params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) { - .RequestState = QPStateInit, - .Attrs = (IB_QP_ATTR_PORTGUID | - IB_QP_ATTR_PKEYINDEX | - IB_QP_ATTR_ACCESSCONTROL), - .PortGUID = kibnal_data.kib_port_guid, - .PkeyIndex = 0, - .AccessControl = { - .s = { - .RdmaWrite = 1, - .RdmaRead = 1, - }, - }, - }; - frc = iba_modify_qp(conn->ibc_qp, ¶ms.qp_attr, NULL); - if (frc != 0) { - CERROR ("Can't set QP %s state to INIT: %d\n", - libcfs_nid2str(nid), frc); - goto failed; - } - - frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't query QP %s attributes: %d\n", - libcfs_nid2str(nid), frc); - goto failed; - } - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - CDEBUG(D_NET, "New conn %p\n", conn); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - FSTATUS frc; - - LASSERT (!in_interrupt()); - - CDEBUG (D_NET, "connection %s\n", - (conn->ibc_peer) == NULL ? "<ANON>" : - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - case IBNAL_CONN_INIT_NOTHING: - case IBNAL_CONN_INIT_QP: - case IBNAL_CONN_DISCONNECTED: - break; - - default: - /* conn must either have never engaged with the CM, or have - * completely disengaged from it */ - CERROR("Bad conn %s state %d\n", - (conn->ibc_peer) == NULL ? "<anon>" : - libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state); - LBUG(); - } - - if (conn->ibc_cep != NULL) { - frc = iba_cm_destroy_cep(conn->ibc_cep); - if (frc != FSUCCESS) - CERROR("Error destroying CEP %p: %d\n", - conn->ibc_cep, frc); - } - - if (conn->ibc_qp != NULL) { - frc = iba_destroy_qp(conn->ibc_qp); - if (frc != FSUCCESS) - CERROR("Error destroying QP %p: %d\n", - conn->ibc_qp, frc); - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_cvars != NULL) - LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - kibnal_peer_connecting(peer) || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - ENTRY; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - if (ni->ni_nid == data->ioc_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - RETURN(rc); -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - *pp = p; - return (0); -} - -int -kibnal_alloc_tx_descs (void) -{ - int i; - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) - return -ENOMEM; - - memset(kibnal_data.kib_tx_descs, 0, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) - return -ENOMEM; -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - if (tx->tx_gl == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kibnal_free_tx_descs (void) -{ - int i; - - if (kibnal_data.kib_tx_descs == NULL) - return; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_gl != NULL) - LIBCFS_FREE(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); -#endif - } - - LIBCFS_FREE(kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); -} - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - struct page *page; - kib_tx_t *tx; - int i; - int rc; - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES()); - if (rc != 0) - return (rc); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - /* Allocate an FMR for this TX so it can map src/sink buffers - * for large transfers */ -#endif - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", - i, tx, tx->tx_msg, tx->tx_hca_msg); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -int -kibnal_register_all_memory(void) -{ - /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous - * chunk starting at 0 */ - struct sysinfo si; - __u64 total; - __u64 total2; - __u64 roundup = (128<<20); /* round up in big chunks */ - IB_MR_PHYS_BUFFER phys; - IB_ACCESS_CONTROL access; - FSTATUS frc; - - memset(&access, 0, sizeof(access)); - access.s.MWBindable = 1; - access.s.LocalWrite = 1; - access.s.RdmaRead = 1; - access.s.RdmaWrite = 1; - - /* XXX we don't bother with first-gen cards */ - if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 && - kibnal_data.kib_hca_attrs.DeviceId == 0x3101) { - CERROR("Can't register all memory on first generation HCAs\n"); - return -EINVAL; - } - - si_meminfo(&si); - - CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n", - si.totalram, si.mem_unit, num_physpages, PAGE_SIZE); - - total = ((__u64)si.totalram) * si.mem_unit; - total2 = num_physpages * PAGE_SIZE; - if (total < total2) - total = total2; - - if (total == 0) { - CERROR("Can't determine memory size\n"); - return -ENOMEM; - } - - roundup = (128<<20); - total = (total + (roundup - 1)) & ~(roundup - 1); - - phys.PhysAddr = 0; - phys.Length = total; - - frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0, - kibnal_data.kib_pd, access, - &kibnal_data.kib_whole_mem.md_handle, - &kibnal_data.kib_whole_mem.md_addr, - &kibnal_data.kib_whole_mem.md_lkey, - &kibnal_data.kib_whole_mem.md_rkey); - - if (frc != FSUCCESS) { - CERROR("registering physical memory failed: %d\n", frc); - return -EIO; - } - - CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n", - phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr); - - return 0; -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - int rc; - - LASSERT (ni == kibnal_data.kib_ni); - LASSERT (ni->ni_data == &kibnal_data); - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - switch (kibnal_data.kib_init) { - default: - CERROR ("Unexpected state %d\n", kibnal_data.kib_init); - LBUG(); - - case IBNAL_INIT_ALL: - /* stop accepting connections, prevent new peers and start to - * tear down all existing ones... */ - kibnal_stop_listener(1); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read (&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - atomic_read (&kibnal_data.kib_npeers)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - /* fall through */ - - case IBNAL_INIT_CQ: - rc = iba_destroy_cq(kibnal_data.kib_cq); - if (rc != 0) - CERROR ("Destroy CQ error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); - /* fall through */ - - case IBNAL_INIT_MD: - rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle); - if (rc != FSUCCESS) - CERROR ("Deregister memory: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_PD: - rc = iba_free_pd(kibnal_data.kib_pd); - if (rc != 0) - CERROR ("Destroy PD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_SD: - rc = iba_sd_deregister(kibnal_data.kib_sd); - if (rc != 0) - CERROR ("Deregister SD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_PORTATTRS: - LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList, - kibnal_data.kib_hca_attrs.PortAttributesListSize); - /* fall through */ - - case IBNAL_INIT_HCA: - rc = iba_close_ca(kibnal_data.kib_hca); - if (rc != 0) - CERROR ("Close HCA error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_DATA: - LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_connd_zombies)); - LASSERT (list_empty (&kibnal_data.kib_connd_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - set_current_state (TASK_INTERRUPTIBLE); - schedule_timeout (HZ); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - kibnal_free_tx_descs(); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_get_ipif_name(char *ifname, int ifname_size, int idx) -{ - char *basename = *kibnal_tunables.kib_ipif_basename; - int n = strlen(basename); - int baseidx; - int m; - - if (n == 0) { /* empty string */ - CERROR("Empty IP interface basename specified\n"); - return -EINVAL; - } - - for (m = n; m > 0; m--) /* find max numeric postfix */ - if (sscanf(basename + m - 1, "%d", &baseidx) != 1) - break; - - if (m == 0) /* just a number */ - m = n; - - if (m == n) /* no postfix */ - baseidx = 1; /* default to 1 */ - - if (m >= ifname_size) - m = ifname_size - 1; - - memcpy(ifname, basename, m); /* copy prefix name */ - - snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx); - - if (strlen(ifname) == ifname_size - 1) { - CERROR("IP interface basename %s too long\n", basename); - return -EINVAL; - } - - return 0; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char ipif_name[32]; - __u32 ip; - __u32 netmask; - int up; - int nob; - struct timeval tv; - IB_PORT_ATTRIBUTES *pattr; - FSTATUS frc; - int rc; - __u32 n; - int i; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[0] == NULL) { - kibnal_data.kib_hca_idx = 0; - } else { - /* Use the HCA specified in 'networks=' */ - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - /* Parse <number> into kib_hca_idx */ - nob = strlen(ni->ni_interfaces[0]); - if (sscanf(ni->ni_interfaces[0], "%d%n", - &kibnal_data.kib_hca_idx, &nob) < 1 || - nob != strlen(ni->ni_interfaces[0])) { - CERROR("Can't parse interface '%s'\n", - ni->ni_interfaces[0]); - return -EINVAL; - } - } - - rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name), - kibnal_data.kib_hca_idx); - if (rc != 0) - return rc; - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - return -ENETDOWN; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - ni->ni_data = &kibnal_data; - kibnal_data.kib_ni = ni; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - PORTAL_MODULE_USE; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - INIT_LIST_HEAD (&kibnal_data.kib_connd_conns); - INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - rc = kibnal_alloc_tx_descs(); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries; - kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/ - *kibnal_tunables.kib_sd_retries; - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, - (void *)(unsigned long)i); - if (rc != 0) { - CERROR("Can't spawn iib scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_connd, NULL); - if (rc != 0) { - CERROR ("Can't spawn iib connd: %d\n", rc); - goto failed; - } - - n = sizeof(kibnal_data.kib_hca_guids) / - sizeof(kibnal_data.kib_hca_guids[0]); - frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids); - if (frc != FSUCCESS) { - CERROR ("Can't get HCA guids: %d\n", frc); - goto failed; - } - - if (n == 0) { - CERROR ("No HCAs found\n"); - goto failed; - } - - if (n <= kibnal_data.kib_hca_idx) { - CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n", - kibnal_data.kib_hca_idx, n - 1); - goto failed; - } - - /* Infinicon has per-HCA notification callbacks */ - frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx], - kibnal_hca_callback, - kibnal_hca_async_callback, - NULL, - &kibnal_data.kib_hca); - if (frc != FSUCCESS) { - CERROR ("Can't open HCA[%d]: %d\n", - kibnal_data.kib_hca_idx, frc); - goto failed; - } - - /* Channel Adapter opened */ - kibnal_data.kib_init = IBNAL_INIT_HCA; - /*****************************************************/ - - kibnal_data.kib_hca_attrs.PortAttributesList = NULL; - kibnal_data.kib_hca_attrs.PortAttributesListSize = 0; - frc = iba_query_ca(kibnal_data.kib_hca, - &kibnal_data.kib_hca_attrs, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't size port attrs: %d\n", frc); - goto failed; - } - - LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList, - kibnal_data.kib_hca_attrs.PortAttributesListSize); - if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL) - goto failed; - - /* Port attrs allocated */ - kibnal_data.kib_init = IBNAL_INIT_PORTATTRS; - /*****************************************************/ - - frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs, - NULL); - if (frc != FSUCCESS) { - CERROR ("Can't get port attrs for HCA %d: %d\n", - kibnal_data.kib_hca_idx, frc); - goto failed; - } - - for (i = 0, pattr = kibnal_data.kib_hca_attrs.PortAttributesList; - pattr != NULL; - i++, pattr = pattr->Next) { - switch (pattr->PortState) { - default: - CERROR("Unexpected port[%d] state %d\n", - i, pattr->PortState); - continue; - case PortStateDown: - CDEBUG(D_NET, "port[%d] Down\n", i); - continue; - case PortStateInit: - CDEBUG(D_NET, "port[%d] Init\n", i); - continue; - case PortStateArmed: - CDEBUG(D_NET, "port[%d] Armed\n", i); - continue; - - case PortStateActive: - CDEBUG(D_NET, "port[%d] Active\n", i); - kibnal_data.kib_port = i; - kibnal_data.kib_port_guid = pattr->GUID; - kibnal_data.kib_port_pkey = pattr->PkeyTable[0]; - break; - } - break; - } - - if (pattr == NULL) { - CERROR ("Can't find an active port\n"); - goto failed; - } - - CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid); - - frc = iba_sd_register(&kibnal_data.kib_sd, NULL); - if (frc != FSUCCESS) { - CERROR ("Can't register with SD: %d\n", frc); - goto failed; - } - - /* Registered with SD OK */ - kibnal_data.kib_init = IBNAL_INIT_SD; - /*****************************************************/ - - frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd); - if (frc != FSUCCESS) { - CERROR ("Can't create PD: %d\n", rc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ - - rc = kibnal_register_all_memory(); - if (rc != 0) { - CERROR ("Can't register all memory\n"); - goto failed; - } - - /* flag whole memory MD initialised */ - kibnal_data.kib_init = IBNAL_INIT_MD; - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(), - &kibnal_data.kib_cq, &kibnal_data.kib_cq, - &n); - if (frc != FSUCCESS) { - CERROR ("Can't create RX CQ: %d\n", frc); - goto failed; - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - /*****************************************************/ - - if (n < IBNAL_CQ_ENTRIES()) { - CERROR ("CQ only has %d entries: %d needed\n", - n, IBNAL_CQ_ENTRIES()); - goto failed; - } - - rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC); - if (rc != 0) { - CERROR ("Failed to re-arm completion queue: %d\n", rc); - goto failed; - } - - rc = kibnal_start_listener(); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return (0); - - failed: - kibnal_shutdown (ni); - return (-ENETDOWN); -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - if (the_lnet.ln_ptlcompat != 0) { - LCONSOLE_ERROR("IIB does not support portals compatibility mode\n"); - return -ENODEV; - } - - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/iiblnd/iiblnd.h b/lnet/klnds/iiblnd/iiblnd.h deleted file mode 100644 index 8b72f24f2264360032f0847921182e6d29d18de4..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd.h +++ /dev/null @@ -1,738 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <linux/iba/ibt.h> - -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) - -/* Test for GCC > 3.2.2 */ -#if GCC_VERSION <= 30202 -/* GCC 3.2.2, and presumably several versions before it, will - * miscompile this driver. See - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */ -#error Invalid GCC version. Must use GCC >= 3.2.3 -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_USE_FMR 0 /* map on demand v. use whole mem mapping */ -#define KIBLND_DETAILED_DEBUG 0 - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 7 /* when to eagerly return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBNAL_RDMA_BASE 0x0eeb0000 -#define IBNAL_STARTING_PSN 1 - -/* QP tunables */ -/* 7 indicates infinite retry attempts, Infinicon recommended 5 */ -#define IBNAL_RETRY 5 /* # times to retry */ -#define IBNAL_RNR_RETRY 5 /* */ -#define IBNAL_CM_RETRY 5 /* # times to retry connection */ -#define IBNAL_FLOW_CONTROL 1 -#define IBNAL_ACK_TIMEOUT 20 /* supposedly 4 secs */ -#define IBNAL_EE_FLOW 1 -#define IBNAL_LOCAL_SUB 1 -#define IBNAL_FAILOVER_ACCEPTED 0 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -#if IBNAL_USE_FMR -# define IBNAL_MAX_RDMA_FRAGS 1 -# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS -#else -# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV -# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE -#endif - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \ - (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)) - -typedef struct -{ - char **kib_hca_basename; /* HCA base name */ - char **kib_ipif_basename; /* IPoIB interface base name */ - char **kib_service_name; /* global service name */ - unsigned int *kib_service_number; /* global service number */ - int *kib_min_reconnect_interval; /* min connect retry seconds... */ - int *kib_max_reconnect_interval; /* max connect retry seconds */ - int *kib_concurrent_peers; /* max # peers */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - int *kib_sd_retries; /* # concurrent sends to 1 peer */ - int *kib_concurrent_sends; /* send work queue sizing */ -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kib_sysctl; /* sysctl interface */ -#endif -} kib_tunables_t; - -/* NB The Infinicon stack has specific typedefs for some things - * (e.g. IB_{L,R}_KEY), that just map back to __u32 etc */ -typedef struct -{ - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; -} kib_pages_t; - -typedef struct -{ - IB_HANDLE md_handle; - __u32 md_lkey; - __u32 md_rkey; - __u64 md_addr; -} kib_md_t; - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ iib instance */ - - __u64 kib_port_guid; /* my GUID (lo 64 of GID)*/ - __u16 kib_port_pkey; /* my pkey, whatever that is */ - struct semaphore kib_listener_signal; /* signal completion */ - IB_HANDLE kib_listener_cep; /* connection end point */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - int kib_ready; /* CQ callback fired */ - int kib_checking_cq; /* a scheduler is checking the CQ */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - struct list_head kib_connd_zombies; /* connections to free */ - struct list_head kib_connd_conns; /* connections to progress */ - struct list_head kib_connd_peers; /* peers waiting for a connection */ - wait_queue_head_t kib_connd_waitq; /* connection daemon sleep here */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - IB_HANDLE kib_hca; /* The HCA */ - int kib_port; /* port on the device */ - IB_HANDLE kib_pd; /* protection domain */ - IB_HANDLE kib_sd; /* SD handle */ - IB_HANDLE kib_cq; /* completion queue */ - kib_md_t kib_whole_mem; /* whole-mem registration */ - - int kib_hca_idx; /* my HCA number */ - uint64 kib_hca_guids[8]; /* all the HCA guids */ - IB_CA_ATTRIBUTES kib_hca_attrs; /* where to get HCA attrs */ - - COMMAND_CONTROL_PARAMETERS kib_sdretry; /* control SD query retries */ -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_HCA 3 -#define IBNAL_INIT_PORTATTRS 4 -#define IBNAL_INIT_SD 5 -#define IBNAL_INIT_PD 6 -#define IBNAL_INIT_MD 7 -#define IBNAL_INIT_TXD 8 -#define IBNAL_INIT_CQ 9 -#define IBNAL_INIT_ALL 10 - -/************************************************************************ - * Wire message structs. - * These are sent in sender's byte order (i.e. receiver flips). - * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD - * private data and SM service info), is LE on the wire. - */ - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; - __u32 ibcp_max_msg_size; - __u32 ibcp_max_frags; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -#if IBNAL_USE_FMR -typedef struct -{ - __u64 rd_addr; /* IO VMA address */ - __u32 rd_nob; /* # of bytes */ - __u32 rd_key; /* remote key */ -} WIRE_ATTR kib_rdma_desc_t; -#else -typedef struct -{ - __u32 rf_nob; /* # of bytes */ - __u64 rf_addr; /* remote io vaddr */ -} WIRE_ATTR kib_rdma_frag_t; - -typedef struct -{ - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrag; /* # fragments */ - kib_rdma_frag_t rd_frags[0]; /* buffer frags */ -} WIRE_ATTR kib_rdma_desc_t; -#endif - -typedef struct -{ - lnet_hdr_t ibprm_hdr; /* LNET header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kib_putreq_msg_t; - -typedef struct -{ - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ibgm_hdr; /* LNET header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibgm_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_get_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __u32 ibcm_status; /* completion status */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - __u64 ibm_seq; /* sequence number */ - - union { - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_putreq_msg_t putreq; - kib_putack_msg_t putack; - kib_get_msg_t get; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_IIB_MAGIC /* unique magic */ -#define IBNAL_MSG_VERSION 2 /* current protocol version */ -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 1 /* previous version */ - -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -/* connection rejection reasons */ -#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */ -#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */ -#define IBNAL_REJECT_FATAL 2 /* Anything else */ - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - __u64 rx_hca_msg; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - IB_WORK_REQ2 rx_wrq; - IB_LOCAL_DATASEGMENT rx_gl; /* and its memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_mapped; /* mapped for RDMA? */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_queued; /* queued for sending */ - int tx_waiting; /* waiting for peer */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ - __u64 tx_hca_msg; /* pre-mapped buffer (HCA vaddr) */ - int tx_nwrq; /* # send work items */ -#if IBNAL_USE_FMR - IB_WORK_REQ2 tx_wrq[2]; /* send work items... */ - IB_LOCAL_DATASEGMENT tx_gl[2]; /* ...and their memory */ - kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ - kib_md_t tx_md; /* mapping */ - __u64 *tx_pages; /* page phys addrs */ -#else - IB_WORK_REQ2 *tx_wrq; /* send work items... */ - IB_LOCAL_DATASEGMENT *tx_gl; /* ...and their memory */ - kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */ -#endif -} kib_tx_t; - -typedef struct -{ - /* scratchpad during connection establishment */ - IB_QP_ATTRIBUTES_QUERY cv_qpattrs; - QUERY cv_query; - IB_SERVICE_RECORD cv_svcrec; - IB_PATH_RECORD cv_path; - CM_CONN_INFO cv_cmci; -} kib_connvars_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - __u64 ibc_txseq; /* tx sequence number */ - __u64 ibc_rxseq; /* rx sequence number */ - __u32 ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - IB_HANDLE ibc_qp; /* queue pair */ - IB_HANDLE ibc_cep; /* CM endpoint */ - kib_connvars_t *ibc_cvars; /* connection scratchpad */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */ -#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ -#define IBNAL_CONN_CONNECTING 2 /* started to connect */ -#define IBNAL_CONN_ESTABLISHED 3 /* connection established */ -#define IBNAL_CONN_DISCONNECTING 4 /* to send disconnect req */ -#define IBNAL_CONN_DISCONNECTED 5 /* no more QP or CM traffic */ - -/* types of connection */ -#define IBNAL_CONN_ACTIVE 0 /* active connect */ -#define IBNAL_CONN_PASSIVE 1 /* passive connect */ -#define IBNAL_CONN_WAITING 2 /* waiting for connect */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - int ibp_version; /* protocol version */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* active connects in progress */ - int ibp_accepting; /* passive connects in progress */ - int ibp_passivewait; /* waiting for peer to connect */ - unsigned long ibp_passivewait_deadline; /* when passive wait must complete */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -/******************************************************************************/ - -/* these are purposely avoiding using local vars so they don't increase - * stack consumption. */ - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_connd_zombies); \ - wake_up(&kibnal_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -/******************************************************************************/ - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active(kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline int -kibnal_peer_connecting(kib_peer_t *peer) -{ - /* Am I expecting a connection to materialise? */ - return (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - peer->ibp_passivewait); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kibnal_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE); - } else { - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE); - } - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_REQ: - case IBNAL_MSG_GET_REQ: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_ACK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA reply/completion: no credits; peer has reserved - * a reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -#define KIBNAL_SERVICE_KEY_MASK (IB_SERVICE_RECORD_COMP_SERVICENAME | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_1 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_2 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_3 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_4 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_5 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_6 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_7 | \ - IB_SERVICE_RECORD_COMP_SERVICEDATA8_8) - -static inline __u64* -kibnal_service_nid_field(IB_SERVICE_RECORD *srv) -{ - /* must be consistent with KIBNAL_SERVICE_KEY_MASK */ - return (__u64 *)srv->ServiceData8; -} - -static inline void -kibnal_set_service_keys(IB_SERVICE_RECORD *srv, lnet_nid_t nid) -{ - char *svc_name = *kibnal_tunables.kib_service_name; - - LASSERT (strlen(svc_name) < sizeof(srv->ServiceName)); - memset (srv->ServiceName, 0, sizeof(srv->ServiceName)); - strcpy (srv->ServiceName, svc_name); - - *kibnal_service_nid_field(srv) = cpu_to_le64(nid); -} - -/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the - * lowest 2 bits of the work request id to stash the work item type (the op - * field is not valid when the wc completes in error). */ - -#define IBNAL_WID_TX 0 -#define IBNAL_WID_RX 1 -#define IBNAL_WID_RDMA 2 -#define IBNAL_WID_MASK 3UL - -static inline __u64 -kibnal_ptr2wreqid (void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & IBNAL_WID_MASK) == 0); - LASSERT ((type & ~IBNAL_WID_MASK) == 0); - return (__u64)(lptr | type); -} - -static inline void * -kibnal_wreqid2ptr (__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK); -} - -static inline int -kibnal_wreqid2type (__u64 wreqid) -{ - return (wreqid & IBNAL_WID_MASK); -} - -static inline void -kibnal_set_conn_state (kib_conn_t *conn, int state) -{ - CDEBUG(D_NET,"%p state %d\n", conn, state); - conn->ibc_state = state; - mb(); -} - -#if IBNAL_USE_FMR - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - return rd->rd_nob; -} - -#else -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrag; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} -#endif - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq); -void kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, int type, - lnet_nid_t dstnid, __u64 dststamp); -int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob); -IB_HANDLE kibnal_create_cep(lnet_nid_t nid); -int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid); -void kibnal_destroy_peer (kib_peer_t *peer); -kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid); -int kibnal_del_peer (lnet_nid_t nid); -void kibnal_peer_alive (kib_peer_t *peer); -void kibnal_unlink_peer_locked (kib_peer_t *peer); -int kibnal_add_persistent_peer (lnet_nid_t nid); -int kibnal_close_stale_conns_locked (kib_peer_t *peer, - __u64 incarnation); -int kibnal_conn_rts(kib_conn_t *conn, - __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn); -kib_conn_t *kibnal_create_conn (lnet_nid_t nid, int proto_version); -void kibnal_destroy_conn (kib_conn_t *conn); -void kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg); -int kibnal_alloc_pages (kib_pages_t **pp, int npages); -void kibnal_free_pages (kib_pages_t *p); -void kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn); -void kibnal_txlist_done (struct list_head *txlist, int status); -int kibnal_post_receives (kib_conn_t *conn); -int kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie); -void kibnal_check_sends (kib_conn_t *conn); -void kibnal_close_conn_locked (kib_conn_t *conn, int error); -int kibnal_thread_start (int (*fn)(void *arg), void *arg); -int kibnal_scheduler(void *arg); -int kibnal_connd (void *arg); -void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); -void kibnal_close_conn (kib_conn_t *conn, int why); -void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob); -void kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev); -void kibnal_hca_callback (void *hca_arg, void *cq_arg); -int kibnal_tunables_init (void); -void kibnal_tunables_fini (void); diff --git a/lnet/klnds/iiblnd/iiblnd_cb.c b/lnet/klnds/iiblnd/iiblnd_cb.c deleted file mode 100644 index fb4bba027063fa96597bd0a6bb6c49fa659e343c..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd_cb.c +++ /dev/null @@ -1,3389 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -void -hexdump(char *string, void *ptr, int len) -{ - unsigned char *c = ptr; - int i; - - return; - - if (len < 0 || len > 2048) { - printk("XXX what the hell? %d\n",len); - return; - } - - printk("%d bytes of '%s' from 0x%p\n", len, string, ptr); - - for (i = 0; i < len;) { - printk("%02x",*(c++)); - i++; - if (!(i & 15)) { - printk("\n"); - } else if (!(i&1)) { - printk(" "); - } - } - - if(len & 15) { - printk("\n"); - } -} - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - int rc = tx->tx_status; - int i; - - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBNAL_USE_FMR - /* Handle unmapping if required */ -#endif - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&kibnal_data.kib_tx_lock); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock(&kibnal_data.kib_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - kib_tx_t *tx; - - spin_lock(&kibnal_data.kib_tx_lock); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock(&kibnal_data.kib_tx_lock); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock(&kibnal_data.kib_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -int -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc = 0; - FSTATUS frc; - - LASSERT (!in_interrupt()); - /* old peers don't reserve rxs for RDMA replies */ - LASSERT (!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (IB_LOCAL_DATASEGMENT) { - .Address = rx->rx_hca_msg, - .Lkey = kibnal_data.kib_whole_mem.md_lkey, - .Length = IBNAL_MSG_SIZE, - }; - - rx->rx_wrq = (IB_WORK_REQ2) { - .Next = NULL, - .WorkReqId = kibnal_ptr2wreqid(rx, IBNAL_WID_RX), - .MessageLen = IBNAL_MSG_SIZE, - .DSList = &rx->rx_gl, - .DSListDepth = 1, - .Operation = WROpRecv, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", - rx->rx_wrq.DSList->Length, - rx->rx_wrq.DSList->Lkey, - rx->rx_wrq.DSList->Address); - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) { - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - mb(); - - frc = iba_post_recv2(conn->ibc_qp, &rx->rx_wrq, NULL); - if (frc == FSUCCESS) { - if (credit || rsrvd_credit) { - spin_lock(&conn->ibc_lock); - - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - return 0; - } - - CERROR ("post rx -> %s failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - rc = -EIO; - kibnal_close_conn(rx->rx_conn, rc); - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return rc; -} - -int -kibnal_post_receives (kib_conn_t *conn) -{ - int i; - int rc; - - LASSERT (conn->ibc_state == IBNAL_CONN_CONNECTING); - - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc. This ref remains until kibnal_post_rx - * fails (i.e. actual failure or we're disconnecting) */ - kibnal_conn_addref(conn); - rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - if (rc != 0) - return rc; - } - - return 0; -} - -kib_tx_t * -kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBNAL_MSG_GET_REQ) { - lnet_set_reply_msg_len(kibnal_data.kib_ni, - tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done(tx); -} - -void -kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - kib_tx_t *tx = kibnal_get_idle_tx(); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t)); - - kibnal_queue_tx(tx, conn); -} - -void -kibnal_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int repost = 1; - int rsrvd_credit = 0; - int rc2; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - conn->ibc_credits += credits; - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBNAL message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_NAK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_PUT_ACK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - spin_lock(&conn->ibc_lock); - tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE, - kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - if (tx->tx_status == 0 && rc2 < 0) - tx->tx_status = rc2; - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kibnal_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBNAL_MSG_PUT_DONE: - /* This buffer was pre-reserved by not returning the credit - * when the PUT_REQ's buffer was reposted, so I just return it - * now */ - kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_GET_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_GET_DONE: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kibnal_close_conn(conn, rc); - - if (repost) { - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - rsrvd_credit = 0; /* peer isn't pre-reserving */ - - kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit); - } -} - -void -kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq) -{ - kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(wc->WorkReqId); - int nob = wc->Length; - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - int rc; - int err = -EIO; - - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - mb(); - - /* receives complete with error in any case after we've started - * disconnecting */ - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) - goto ignore; - - if (wc->Status != WRStatusSuccess) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), wc->Status); - goto failed; - } - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - rx->rx_nob = nob; /* Now I know nob > 0 */ - mb(); - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - if (msg->ibm_seq != rxseq) { - CERROR ("Out-of-sequence rx from %s" - ": got "LPD64" but expected "LPD64"\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - msg->ibm_seq, rxseq); - goto failed; - } - - /* set time last known alive */ - kibnal_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - } - kibnal_handle_rx(rx); - return; - - failed: - kibnal_close_conn(conn, err); - ignore: - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -struct page * -kibnal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#if CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBNAL_USE_FMR -int -kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page, - unsigned long page_offset, unsigned long len) -{ - kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag]; - - if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) { - CERROR ("Too many RDMA fragments\n"); - return -EMSGSIZE; - } - - if (active) { - if (rd->rd_nfrag == 0) - rd->rd_key = kibnal_data.kib_whole_mem.md_lkey; - } else { - if (rd->rd_nfrag == 0) - rd->rd_key = kibnal_data.kib_whole_mem.md_rkey; - } - - frag->rf_nob = len; - frag->rf_addr = kibnal_data.kib_whole_mem.md_addr + - lnet_page2phys(page) + page_offset; - - CDEBUG(D_NET,"map key %x frag [%d]["LPX64" for %d]\n", - rd->rd_key, rd->rd_nfrag, frag->rf_addr, frag->rf_nob); - - rd->rd_nfrag++; - return 0; -} - -int -kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int fragnob; - int rc; - unsigned long vaddr; - struct page *page; - int page_offset; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - rc = kibnal_append_rdfrag(rd, active, page, - page_offset, fragnob); - if (rc != 0) - return rc; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return 0; -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int fragnob; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (nkiov > 0); - fragnob = min((int)(kiov->kiov_len - offset), nob); - - rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page, - kiov->kiov_offset + offset, - fragnob); - if (rc != 0) - return rc; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return 0; -} -#else -int -kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int npages, unsigned long page_offset, int nob) -{ - IB_ACCESS_CONTROL access = {0,}; - FSTATUS frc; - - LASSERT ((rd != tx->tx_rd) == !active); - LASSERT (!tx->tx_md.md_active); - LASSERT (tx->tx_md.md_fmrcount > 0); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - if (!active) { - // access.s.MWBindable = 1; - access.s.LocalWrite = 1; - access.s.RdmaWrite = 1; - } - - /* Map the memory described by tx->tx_pages - frc = iibt_register_physical_memory(kibnal_data.kib_hca, - IBNAL_RDMA_BASE, - tx->tx_pages, npages, - page_offset, - kibnal_data.kib_pd, - access, - &tx->tx_md.md_handle, - &tx->tx_md.md_addr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); - */ - return -EINVAL; -} - -int -kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - LASSERT (!tx->tx_md.md_active); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} -#endif - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - FSTATUS frc; - int rc; - int consume_cred; - int done; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kibnal_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock(&conn->ibc_lock); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry (conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing waiting */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kibnal_tunables.kib_concurrent_sends) { - /* We've got some tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - } - - list_del (&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kibnal_tx_done(tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation, - conn->ibc_txseq); - - conn->ibc_txseq++; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); - - LASSERT (tx->tx_nwrq > 0); - - rc = 0; - frc = FSUCCESS; - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) { - rc = -ECONNABORTED; - } else { - frc = iba_post_send2(conn->ibc_qp, tx->tx_wrq, NULL); - if (frc != FSUCCESS) - rc = -EIO; - } - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - frc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kibnal_tx_complete (IB_WORK_COMPLETION *wc) -{ - kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(wc->WorkReqId); - kib_conn_t *conn = tx->tx_conn; - int failed = wc->Status != WRStatusSuccess; - int idle; - - CDEBUG(D_NET, "%s: sending %d nwrq %d status %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_sending, tx->tx_nwrq, wc->Status); - - LASSERT (tx->tx_sending > 0); - - if (failed && - tx->tx_status == 0 && - conn->ibc_state == IBNAL_CONN_ESTABLISHED) { -#if KIBLND_DETAILED_DEBUG - int i; - IB_WORK_REQ2 *wrq = &tx->tx_wrq[0]; - IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[0]; - lnet_msg_t *lntmsg = tx->tx_lntmsg[0]; -#endif - CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64 - " sending %d waiting %d failed %d nwrk %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, wc->Status, - tx->tx_nwrq); -#if KIBLND_DETAILED_DEBUG - for (i = 0; i < tx->tx_nwrq; i++, wrq++, gl++) { - switch (wrq->Operation) { - default: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p OP %d " - "DSList %p(%p)/%d: "LPX64"/%d K %x\n", - i, wrq, wrq->Next, wrq->Operation, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey); - break; - case WROpSend: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p SEND " - "DSList %p(%p)/%d: "LPX64"/%d K %x\n", - i, wrq, wrq->Next, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey); - break; - case WROpRdmaWrite: - CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p DMA " - "DSList: %p(%p)/%d "LPX64"/%d K %x -> " - LPX64" K %x\n", - i, wrq, wrq->Next, - wrq->DSList, gl, wrq->DSListDepth, - gl->Address, gl->Length, gl->Lkey, - wrq->Req.SendRC.RemoteDS.Address, - wrq->Req.SendRC.RemoteDS.Rkey); - break; - } - } - - switch (tx->tx_msg->ibm_type) { - default: - CDEBUG(D_NETERROR, " msg type %x %p/%d, No RDMA\n", - tx->tx_msg->ibm_type, - tx->tx_msg, tx->tx_msg->ibm_nob); - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - CDEBUG(D_NETERROR, " msg type %x %p/%d, RDMA key %x frags %d...\n", - tx->tx_msg->ibm_type, - tx->tx_msg, tx->tx_msg->ibm_nob, - tx->tx_rd->rd_key, tx->tx_rd->rd_nfrag); - for (i = 0; i < tx->tx_rd->rd_nfrag; i++) - CDEBUG(D_NETERROR, " [%d] "LPX64"/%d\n", i, - tx->tx_rd->rd_frags[i].rf_addr, - tx->tx_rd->rd_frags[i].rf_nob); - if (lntmsg == NULL) { - CDEBUG(D_NETERROR, " No lntmsg\n"); - } else if (lntmsg->msg_iov != NULL) { - CDEBUG(D_NETERROR, " lntmsg in %d VIRT frags...\n", - lntmsg->msg_niov); - for (i = 0; i < lntmsg->msg_niov; i++) - CDEBUG(D_NETERROR, " [%d] %p/%d\n", i, - lntmsg->msg_iov[i].iov_base, - lntmsg->msg_iov[i].iov_len); - } else if (lntmsg->msg_kiov != NULL) { - CDEBUG(D_NETERROR, " lntmsg in %d PAGE frags...\n", - lntmsg->msg_niov); - for (i = 0; i < lntmsg->msg_niov; i++) - CDEBUG(D_NETERROR, " [%d] %p+%d/%d\n", i, - lntmsg->msg_kiov[i].kiov_page, - lntmsg->msg_kiov[i].kiov_offset, - lntmsg->msg_kiov[i].kiov_len); - } else { - CDEBUG(D_NETERROR, " lntmsg in %d frags\n", - lntmsg->msg_niov); - } - - break; - } -#endif - } - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done (tx); - - if (failed) { - kibnal_close_conn (conn, -EIO); - } else { - kibnal_peer_alive(conn->ibc_peer); - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); /* ...until here */ -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nwrq]; - IB_WORK_REQ2 *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (tx->tx_nwrq >= 0 && - tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS)); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - *gl = (IB_LOCAL_DATASEGMENT) { - .Address = tx->tx_hca_msg, - .Length = IBNAL_MSG_SIZE, - .Lkey = kibnal_data.kib_whole_mem.md_lkey, - }; - - wrq->Next = NULL; /* This is the last one */ - - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_TX); - wrq->Operation = WROpSend; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 1; - wrq->Req.SendRC.Options.s.SignaledCompletion = 1; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - /* fence only needed on RDMA reads */ - - tx->tx_nwrq++; -} - -int -kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - IB_LOCAL_DATASEGMENT *gl; - IB_WORK_REQ2 *wrq; - int rc; - -#if IBNAL_USE_FMR - LASSERT (tx->tx_nwrq == 0); - - gl = &tx->tx_gl[0]; - gl->Length = nob; - gl->Address = srcrd->rd_addr; - gl->Lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[0]; - - wrq->Next = wrq + 1; - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->Operation = WROpRdmaWrite; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 0; - wrq->Req.SendRC.Options.s.SignaledCompletion = 0; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - - wrq->Req.SendRC.RemoteDS.Address = dstrd->rd_addr; - wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key; - - tx->tx_nwrq = 1; - rc = nob; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - /* Called by scheduler */ - LASSERT (!in_interrupt()); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - rc = resid; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrag) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrag) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrag, - dstidx, dstrd->rd_nfrag); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - gl = &tx->tx_gl[tx->tx_nwrq]; - gl->Length = wrknob; - gl->Address = srcfrag->rf_addr; - gl->Lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->Next = wrq + 1; - wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->Operation = WROpRdmaWrite; - wrq->DSList = gl; - wrq->DSListDepth = 1; - wrq->MessageLen = nob; - - wrq->Req.SendRC.ImmediateData = 0; - wrq->Req.SendRC.Options.s.SolicitedEvent = 0; - wrq->Req.SendRC.Options.s.SignaledCompletion = 0; - wrq->Req.SendRC.Options.s.ImmediateData = 0; - wrq->Req.SendRC.Options.s.Fence = 0; - - wrq->Req.SendRC.RemoteDS.Address = dstfrag->rf_addr; - wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - srcfrag->rf_addr += wrknob; - srcfrag->rf_nob -= wrknob; - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - dstfrag->rf_addr += wrknob; - dstfrag->rf_nob -= wrknob; - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kibnal_queue_tx_locked (tx, conn); - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_active_connect_locked (kib_peer_t *peer, int proto_version) -{ - /* Called holding kib_global_lock exclusive with IRQs disabled */ - - peer->ibp_version = proto_version; /* proto version for new conn */ - peer->ibp_connecting++; /* I'm connecting */ - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock(&kibnal_data.kib_connd_lock); - - list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock(&kibnal_data.kib_connd_lock); -} - -void -kibnal_schedule_active_connect (kib_peer_t *peer, int proto_version) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_schedule_active_connect_locked(peer, proto_version); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_conn_t *conn; - unsigned long flags; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - int retry; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me... */ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...to here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* 1 ref for me... */ - write_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (!kibnal_peer_connecting(peer)) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - kibnal_schedule_active_connect_locked(peer, IBNAL_MSG_VERSION); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd, - 0, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd, - 0, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - -#if IBNAL_USE_FMR - nob = sizeof(kib_get_msg_t); -#else - { - int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag; - - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kibnal_tx_done(tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kibnal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kibnal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBNAL_MSG_SIZE); - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kibnal_launch_tx(tx, target.nid); - return 0; -} - -void -kibnal_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1, - niov, iov, offset, nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (rc == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kibnal_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kibnal_tx_done(tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR("Dropping message from %s: no buffers free. " - "%s is running an old version of LNET that may " - "deadlock if messages wait for buffers)\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_cred = 1; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - 0, - niov, iov, offset, mlen); - else - rc = kibnal_setup_rd_kiov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - 0, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_tx_done(tx); - /* tell peer it's over */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBNAL_USE_FMR - nob = sizeof(kib_putack_msg_t); -#else - { - int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag; - - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kibnal_queue_tx(tx, conn); - - if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */ - break; - - case IBNAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kibnal_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kibnal_post_rx(rx, post_cred, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_schedule_conn (kib_conn_t *conn) -{ - unsigned long flags; - - kibnal_conn_addref(conn); /* ++ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping to start shutdown of an - * established connection. 'error' is zero for a normal shutdown. - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - return; /* already being handled */ - - /* NB Can't take ibc_lock here (could be in IRQ context), without - * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */ - - if (error == 0 && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_txseq, conn->ibc_rxseq); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)", - conn->ibc_txseq, conn->ibc_rxseq); -#if 0 - /* can't skip down the queue without holding ibc_lock (see above) */ - list_for_each(tmp, &conn->ibc_tx_queue) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - CERROR(" queued tx type %x cookie "LPX64 - " sending %d waiting %d ticks %ld/%d\n", - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, - (long)(tx->tx_deadline - jiffies), HZ); - } - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - CERROR(" active tx type %x cookie "LPX64 - " sending %d waiting %d ticks %ld/%d\n", - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, - (long)(tx->tx_deadline - jiffies), HZ); - } -#endif - } - - list_del (&conn->ibc_list); - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - peer->ibp_error = error; /* set/clear error on last conn */ - } - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTING); - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); /* lose ibc_list's ref */ -} - -void -kibnal_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_close_conn_locked (conn, error); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_handle_rx(rx); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kibnal_txlist_done(&zombies, -ECONNABORTED); -} - -void -kibnal_conn_disconnected(kib_conn_t *conn) -{ - static IB_QP_ATTRIBUTES_MODIFY qpam = {.RequestState = QPStateError}; - - FSTATUS frc; - - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED); - - /* move QP to error state to make posted work items complete */ - frc = iba_modify_qp(conn->ibc_qp, &qpam, NULL); - if (frc != FSUCCESS) - CERROR("can't move qp state to error: %d\n", frc); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_handle_early_rxs(conn); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int type, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT (error != 0); - LASSERT (!in_interrupt()); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - LASSERT (kibnal_peer_connecting(peer)); - - switch (type) { - case IBNAL_CONN_ACTIVE: - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - break; - - case IBNAL_CONN_PASSIVE: - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - break; - - case IBNAL_CONN_WAITING: - /* Can't assert; I might be racing with a successful connection - * which clears passivewait */ - peer->ibp_passivewait = 0; - break; - default: - LBUG(); - } - - if (kibnal_peer_connecting(peer) || /* another attempt underway */ - !list_empty(&peer->ibp_conns)) { /* got connected */ - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return; - } - - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - peer->ibp_persistence == 0) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done (&zombies, -EHOSTUNREACH); -} - -void -kibnal_connreq_done (kib_conn_t *conn, int type, int status) -{ - kib_peer_t *peer = conn->ibc_peer; - struct list_head txs; - kib_tx_t *tx; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (type == IBNAL_CONN_ACTIVE || type == IBNAL_CONN_PASSIVE); - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP); - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - LASSERT (kibnal_peer_connecting(peer)); - - LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars)); - conn->ibc_cvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - kibnal_peer_connect_failed(conn->ibc_peer, type, status); - kibnal_conn_disconnected(conn); - kibnal_conn_decref(conn); /* Lose CM's ref */ - return; - } - - /* connection established */ - LASSERT(conn->ibc_state == IBNAL_CONN_CONNECTING); - - conn->ibc_last_send = jiffies; - kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED); - kibnal_peer_alive(peer); - - CDEBUG(D_NET, "Connection %s ESTABLISHED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - peer->ibp_passivewait = 0; /* not waiting (got conn now) */ - kibnal_conn_addref(conn); /* +1 ref for ibc_list */ - list_add_tail(&conn->ibc_list, &peer->ibp_conns); - - if (!kibnal_peer_active(peer)) { - /* peer has been deleted */ - kibnal_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_connect_failed(conn->ibc_peer, type, -ECONNABORTED); - kibnal_conn_decref(conn); /* lose CM's ref */ - return; - } - - switch (type) { - case IBNAL_CONN_ACTIVE: - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - break; - - case IBNAL_CONN_PASSIVE: - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - break; - default: - LBUG(); - } - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - /* Nuke any dangling conns from a different peer instance... */ - kibnal_close_stale_conns_locked(peer, conn->ibc_incarnation); - - /* grab txs blocking for a conn */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - spin_unlock (&conn->ibc_lock); - kibnal_check_sends (conn); -} - -void -kibnal_reject (lnet_nid_t nid, IB_HANDLE cep, int why) -{ - static CM_REJECT_INFO msgs[3]; - CM_REJECT_INFO *msg = &msgs[why]; - FSTATUS frc; - - LASSERT (why >= 0 && why < sizeof(msgs)/sizeof(msgs[0])); - - /* If I wasn't so lazy, I'd initialise this only once; it's effectively - * read-only... */ - msg->Reason = RC_USER_REJ; - msg->PrivateData[0] = (IBNAL_MSG_MAGIC) & 0xff; - msg->PrivateData[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff; - msg->PrivateData[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff; - msg->PrivateData[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff; - msg->PrivateData[4] = (IBNAL_MSG_VERSION) & 0xff; - msg->PrivateData[5] = (IBNAL_MSG_VERSION >> 8) & 0xff; - msg->PrivateData[6] = why; - - frc = iba_cm_reject(cep, msg); - if (frc != FSUCCESS) - CERROR("Error %d rejecting %s\n", frc, libcfs_nid2str(nid)); -} - -void -kibnal_check_connreject(kib_conn_t *conn, int type, CM_REJECT_INFO *rej) -{ - kib_peer_t *peer = conn->ibc_peer; - unsigned long flags; - int magic; - int version; - int why; - - LASSERT (type == IBNAL_CONN_ACTIVE || - type == IBNAL_CONN_PASSIVE); - - CDEBUG(D_NET, "%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), rej->Reason); - - switch (rej->Reason) { - case RC_STALE_CONN: - if (type == IBNAL_CONN_PASSIVE) { - CERROR("Connection to %s rejected (stale QP)\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CWARN("Connection from %s rejected (stale QP): " - "retrying...\n", libcfs_nid2str(peer->ibp_nid)); - - /* retry from scratch to allocate a new conn - * which will use a different QP */ - kibnal_schedule_active_connect(peer, peer->ibp_version); - } - - /* An FCM_DISCONNECTED callback is still outstanding: give it a - * ref since kibnal_connreq_done() drops the CM's ref on conn - * on failure */ - kibnal_conn_addref(conn); - break; - - case RC_USER_REJ: - magic = (rej->PrivateData[0]) | - (rej->PrivateData[1] << 8) | - (rej->PrivateData[2] << 16) | - (rej->PrivateData[3] << 24); - version = (rej->PrivateData[4]) | - (rej->PrivateData[5] << 8); - why = (rej->PrivateData[6]); - - /* retry with old proto version */ - if (magic == IBNAL_MSG_MAGIC && - version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - conn->ibc_version == IBNAL_MSG_VERSION && - type != IBNAL_CONN_PASSIVE) { - /* retry with a new conn */ - CWARN ("Connection to %s refused: " - "retrying with old protocol version 0x%x\n", - libcfs_nid2str(peer->ibp_nid), version); - kibnal_schedule_active_connect(peer, version); - break; - } - - if (magic != IBNAL_MSG_MAGIC || - version != IBNAL_MSG_VERSION) { - CERROR("%s connection with %s rejected " - "(magic/ver %08x/%d why %d): " - "incompatible protocol\n", - (type == IBNAL_CONN_ACTIVE) ? - "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), - magic, version, why); - break; - } - - if (type == IBNAL_CONN_ACTIVE && - why == IBNAL_REJECT_CONN_RACE) { - /* lost connection race */ - CWARN("Connection to %s rejected: " - "lost connection race\n", - libcfs_nid2str(peer->ibp_nid)); - - write_lock_irqsave(&kibnal_data.kib_global_lock, - flags); - - if (list_empty(&peer->ibp_conns)) { - peer->ibp_passivewait = 1; - peer->ibp_passivewait_deadline = - jiffies + - (*kibnal_tunables.kib_timeout * HZ); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - break; - } - - CERROR("%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), why); - break; - - default: - CERROR("%s connection with %s rejected: %d\n", - (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive", - libcfs_nid2str(peer->ibp_nid), rej->Reason); - } - - kibnal_connreq_done(conn, type, -ECONNREFUSED); -} - -void -kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info) -{ - CDEBUG(D_NET, "%s: state %d, status 0x%x\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - conn->ibc_state, info->Status); - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - switch (info->Status) { - default: - LBUG(); - break; - - case FCM_DISCONNECT_REQUEST: - /* Schedule conn to iba_cm_disconnect() if it wasn't already */ - kibnal_close_conn (conn, 0); - break; - - case FCM_DISCONNECT_REPLY: /* peer acks my disconnect req */ - case FCM_DISCONNECTED: /* end of TIME_WAIT */ - CDEBUG(D_NET, "Connection %s disconnected.\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_conn_decref(conn); /* Lose CM's ref */ - break; - } -} - -void -kibnal_cm_passive_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - kib_conn_t *conn = arg; - - CDEBUG(D_NET, "status 0x%x\n", info->Status); - - /* Established Connection Notifier */ - switch (info->Status) { - default: - CERROR("Unexpected status %d on Connection %s\n", - info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - LBUG(); - break; - - case FCM_CONNECT_TIMEOUT: - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ETIMEDOUT); - break; - - case FCM_CONNECT_REJECT: - kibnal_check_connreject(conn, IBNAL_CONN_PASSIVE, - &info->Info.Reject); - break; - - case FCM_CONNECT_ESTABLISHED: - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, 0); - break; - - case FCM_DISCONNECT_REQUEST: - case FCM_DISCONNECT_REPLY: - case FCM_DISCONNECTED: - kibnal_cm_disconnect_callback(conn, info); - break; - } -} - -int -kibnal_accept (kib_conn_t **connp, IB_HANDLE cep, kib_msg_t *msg, int nob) -{ - lnet_nid_t nid; - kib_conn_t *conn; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - rc = kibnal_unpack_msg(msg, 0, nob); - if (rc != 0) { - /* SILENT! kibnal_unpack_msg() complains if required */ - kibnal_reject(LNET_NID_ANY, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - nid = msg->ibm_srcnid; - - if (msg->ibm_version != IBNAL_MSG_VERSION) - CWARN("Connection from %s: old protocol version 0x%x\n", - libcfs_nid2str(nid), msg->ibm_version); - - if (msg->ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Can't accept %s: bad request type %d (%d expected)\n", - libcfs_nid2str(nid), msg->ibm_type, IBNAL_MSG_CONNREQ); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid) { - CERROR("Can't accept %s: bad dst NID %s (%s expected)\n", - libcfs_nid2str(nid), - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid)); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE || - msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE || - msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n", - libcfs_nid2str(nid), - msg->ibm_u.connparams.ibcp_queue_depth, - msg->ibm_u.connparams.ibcp_max_msg_size, - msg->ibm_u.connparams.ibcp_max_frags, - IBNAL_MSG_QUEUE_SIZE, - IBNAL_MSG_SIZE, - IBNAL_MAX_RDMA_FRAGS); - kibnal_reject(nid, cep, IBNAL_REJECT_FATAL); - return -EPROTO; - } - - conn = kibnal_create_conn(nid, msg->ibm_version); - if (conn == NULL) { - kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES); - return -ENOMEM; - } - - /* assume 'nid' is a new peer */ - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) { - kibnal_conn_decref(conn); - kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES); - return -ENOMEM; - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - peer2 = kibnal_find_peer_locked(nid); - if (peer2 == NULL) { - /* peer table takes my ref on peer */ - list_add_tail (&peer->ibp_list, kibnal_nid2peerlist(nid)); - LASSERT (peer->ibp_connecting == 0); - } else { - kibnal_peer_decref(peer); - peer = peer2; - - if (peer->ibp_connecting != 0 && - peer->ibp_nid < kibnal_data.kib_ni->ni_nid) { - /* Resolve concurrent connection attempts in favour of - * the higher NID */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - kibnal_conn_decref(conn); - kibnal_reject(nid, cep, IBNAL_REJECT_CONN_RACE); - return -EALREADY; - } - } - - kibnal_peer_addref(peer); /* +1 ref for conn */ - peer->ibp_accepting++; - - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - conn->ibc_peer = peer; - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - *connp = conn; - return 0; -} - -void -kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - - CM_REQUEST_INFO *req = &info->Info.Request; - CM_REPLY_INFO *rep; - kib_conn_t *conn; - FSTATUS frc; - int rc; - - LASSERT(arg == NULL); /* no conn yet for passive */ - - CDEBUG(D_NET, "%x\n", info->Status); - - if (info->Status == FCM_CONNECT_CANCEL) { - up(&kibnal_data.kib_listener_signal); - return; - } - - LASSERT (info->Status == FCM_CONNECT_REQUEST); - - rc = kibnal_accept(&conn, cep, (kib_msg_t *)req->PrivateData, - CM_REQUEST_INFO_USER_LEN); - if (rc != 0) /* kibnal_accept has rejected */ - return; - - conn->ibc_cvars->cv_path = req->PathInfo.Path; - - rc = kibnal_conn_rts(conn, - req->CEPInfo.QPN, - req->CEPInfo.OfferedInitiatorDepth, - req->CEPInfo.OfferedResponderResources, - req->CEPInfo.StartingPSN); - if (rc != 0) { - kibnal_reject(conn->ibc_peer->ibp_nid, cep, - IBNAL_REJECT_NO_RESOURCES); - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED); - return; - } - - memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci)); - rep = &conn->ibc_cvars->cv_cmci.Info.Reply; - - rep->QPN = conn->ibc_cvars->cv_qpattrs.QPNumber; - rep->QKey = conn->ibc_cvars->cv_qpattrs.Qkey; - rep->StartingPSN = conn->ibc_cvars->cv_qpattrs.RecvPSN; - rep->EndToEndFlowControl = conn->ibc_cvars->cv_qpattrs.FlowControl; - rep->ArbInitiatorDepth = conn->ibc_cvars->cv_qpattrs.InitiatorDepth; - rep->ArbResponderResources = conn->ibc_cvars->cv_qpattrs.ResponderResources; - rep->TargetAckDelay = kibnal_data.kib_hca_attrs.LocalCaAckDelay; - rep->FailoverAccepted = IBNAL_FAILOVER_ACCEPTED; - rep->RnRRetryCount = req->CEPInfo.RnrRetryCount; - - CLASSERT (CM_REPLY_INFO_USER_LEN >= - offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - kibnal_pack_connmsg((kib_msg_t *)rep->PrivateData, - conn->ibc_version, - CM_REPLY_INFO_USER_LEN, - IBNAL_MSG_CONNACK, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - LASSERT (conn->ibc_cep == NULL); - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - - frc = iba_cm_accept(cep, - &conn->ibc_cvars->cv_cmci, - NULL, - kibnal_cm_passive_callback, conn, - &conn->ibc_cep); - - if (frc == FSUCCESS || frc == FPENDING) - return; - - CERROR("iba_cm_accept(%s) failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED); -} - -void -kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep) -{ - kib_msg_t *msg = (kib_msg_t *)rep->PrivateData; - lnet_nid_t nid = conn->ibc_peer->ibp_nid; - FSTATUS frc; - int rc; - - rc = kibnal_unpack_msg(msg, conn->ibc_version, CM_REPLY_INFO_USER_LEN); - if (rc != 0) { - CERROR ("Error %d unpacking connack from %s\n", - rc, libcfs_nid2str(nid)); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - if (msg->ibm_type != IBNAL_MSG_CONNACK) { - CERROR("Bad connack request type %d (%d expected) from %s\n", - msg->ibm_type, IBNAL_MSG_CONNREQ, - libcfs_nid2str(msg->ibm_srcnid)); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Stale connack from %s(%s): %s(%s), "LPX64"("LPX64")\n", - libcfs_nid2str(msg->ibm_srcnid), - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid), - msg->ibm_dststamp, kibnal_data.kib_incarnation); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ESTALE); - return; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE || - msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE || - msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n", - libcfs_nid2str(msg->ibm_srcnid), - msg->ibm_u.connparams.ibcp_queue_depth, - msg->ibm_u.connparams.ibcp_max_msg_size, - msg->ibm_u.connparams.ibcp_max_frags, - IBNAL_MSG_QUEUE_SIZE, - IBNAL_MSG_SIZE, - IBNAL_MAX_RDMA_FRAGS); - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO); - return; - } - - CDEBUG(D_NET, "Connection %s REP_RECEIVED.\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - rc = kibnal_conn_rts(conn, - rep->QPN, - rep->ArbInitiatorDepth, - rep->ArbResponderResources, - rep->StartingPSN); - if (rc != 0) { - kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_NO_RESOURCES); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EIO); - return; - } - - memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci)); - - frc = iba_cm_accept(conn->ibc_cep, - &conn->ibc_cvars->cv_cmci, - NULL, NULL, NULL, NULL); - - if (frc == FCM_CONNECT_ESTABLISHED) { - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, 0); - return; - } - - CERROR("Connection %s CMAccept failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ECONNABORTED); -} - -void -kibnal_cm_active_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg) -{ - kib_conn_t *conn = arg; - - CDEBUG(D_NET, "status 0x%x\n", info->Status); - - switch (info->Status) { - default: - CERROR("unknown status %d on Connection %s\n", - info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - LBUG(); - break; - - case FCM_CONNECT_TIMEOUT: - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ETIMEDOUT); - break; - - case FCM_CONNECT_REJECT: - kibnal_check_connreject(conn, IBNAL_CONN_ACTIVE, - &info->Info.Reject); - break; - - case FCM_CONNECT_REPLY: - kibnal_check_connreply(conn, &info->Info.Reply); - break; - - case FCM_DISCONNECT_REQUEST: - case FCM_DISCONNECT_REPLY: - case FCM_DISCONNECTED: - kibnal_cm_disconnect_callback(conn, info); - break; - } -} - -void -dump_path_records(PATH_RESULTS *results) -{ - IB_PATH_RECORD *path; - int i; - - for (i = 0; i < results->NumPathRecords; i++) { - path = &results->PathRecords[i]; - CDEBUG(D_NET, "%d: sgid "LPX64":"LPX64" dgid " - LPX64":"LPX64" pkey %x\n", - i, - path->SGID.Type.Global.SubnetPrefix, - path->SGID.Type.Global.InterfaceID, - path->DGID.Type.Global.SubnetPrefix, - path->DGID.Type.Global.InterfaceID, - path->P_Key); - } -} - -void -kibnal_pathreq_callback (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qrslt) -{ - IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs; - kib_conn_t *conn = arg; - CM_REQUEST_INFO *req = &conn->ibc_cvars->cv_cmci.Info.Request; - PATH_RESULTS *path = (PATH_RESULTS *)qrslt->QueryResult; - FSTATUS frc; - - if (qrslt->Status != FSUCCESS || - qrslt->ResultDataSize < sizeof(*path)) { - CDEBUG (D_NETERROR, "pathreq %s failed: status %d data size %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - qrslt->Status, qrslt->ResultDataSize); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - if (path->NumPathRecords < 1) { - CDEBUG (D_NETERROR, "pathreq %s failed: no path records\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - //dump_path_records(path); - conn->ibc_cvars->cv_path = path->PathRecords[0]; - - LASSERT (conn->ibc_cep == NULL); - - conn->ibc_cep = kibnal_create_cep(conn->ibc_peer->ibp_nid); - if (conn->ibc_cep == NULL) { - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ENOMEM); - return; - } - - memset(req, 0, sizeof(*req)); - req->SID = conn->ibc_cvars->cv_svcrec.RID.ServiceID; - req->CEPInfo.CaGUID = kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx]; - req->CEPInfo.EndToEndFlowControl = IBNAL_EE_FLOW; - req->CEPInfo.PortGUID = conn->ibc_cvars->cv_path.SGID.Type.Global.InterfaceID; - req->CEPInfo.RetryCount = IBNAL_RETRY; - req->CEPInfo.RnrRetryCount = IBNAL_RNR_RETRY; - req->CEPInfo.AckTimeout = IBNAL_ACK_TIMEOUT; - req->CEPInfo.StartingPSN = IBNAL_STARTING_PSN; - req->CEPInfo.QPN = conn->ibc_cvars->cv_qpattrs.QPNumber; - req->CEPInfo.QKey = conn->ibc_cvars->cv_qpattrs.Qkey; - req->CEPInfo.OfferedResponderResources = ca_attr->MaxQPResponderResources; - req->CEPInfo.OfferedInitiatorDepth = ca_attr->MaxQPInitiatorDepth; - req->PathInfo.bSubnetLocal = IBNAL_LOCAL_SUB; - req->PathInfo.Path = conn->ibc_cvars->cv_path; - - CLASSERT (CM_REQUEST_INFO_USER_LEN >= - offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t)); - - kibnal_pack_connmsg((kib_msg_t *)req->PrivateData, - conn->ibc_version, - CM_REQUEST_INFO_USER_LEN, - IBNAL_MSG_CONNREQ, - conn->ibc_peer->ibp_nid, 0); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - ((kib_msg_t *)req->PrivateData)->ibm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - ((kib_msg_t *)req->PrivateData)->ibm_magic = - LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* Flag I'm getting involved with the CM... */ - kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING); - - /* cm callback gets my conn ref */ - frc = iba_cm_connect(conn->ibc_cep, req, - kibnal_cm_active_callback, conn); - if (frc == FPENDING || frc == FSUCCESS) - return; - - CERROR ("Connect %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -void -kibnal_dump_service_records(SERVICE_RECORD_RESULTS *results) -{ - IB_SERVICE_RECORD *svc; - int i; - - for (i = 0; i < results->NumServiceRecords; i++) { - svc = &results->ServiceRecords[i]; - CDEBUG(D_NET, "%d: sid "LPX64" gid "LPX64":"LPX64" pkey %x\n", - i, - svc->RID.ServiceID, - svc->RID.ServiceGID.Type.Global.SubnetPrefix, - svc->RID.ServiceGID.Type.Global.InterfaceID, - svc->RID.ServiceP_Key); - } -} - -void -kibnal_service_get_callback (void *arg, QUERY *qry, - QUERY_RESULT_VALUES *qrslt) -{ - kib_conn_t *conn = arg; - SERVICE_RECORD_RESULTS *svc; - FSTATUS frc; - - if (qrslt->Status != FSUCCESS || - qrslt->ResultDataSize < sizeof(*svc)) { - CDEBUG (D_NETERROR, "Lookup %s failed: status %d data size %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - qrslt->Status, qrslt->ResultDataSize); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - svc = (SERVICE_RECORD_RESULTS *)qrslt->QueryResult; - if (svc->NumServiceRecords < 1) { - CDEBUG (D_NETERROR, "lookup %s failed: no service records\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); - return; - } - - //kibnal_dump_service_records(svc); - conn->ibc_cvars->cv_svcrec = svc->ServiceRecords[0]; - - qry = &conn->ibc_cvars->cv_query; - memset(qry, 0, sizeof(*qry)); - - qry->OutputType = OutputTypePathRecord; - qry->InputType = InputTypePortGuidPair; - - qry->InputValue.PortGuidPair.SourcePortGuid = - kibnal_data.kib_port_guid; - qry->InputValue.PortGuidPair.DestPortGuid = - conn->ibc_cvars->cv_svcrec.RID.ServiceGID.Type.Global.InterfaceID; - - /* kibnal_pathreq_callback gets my conn ref */ - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - qry, - kibnal_pathreq_callback, - &kibnal_data.kib_sdretry, - conn); - if (frc == FPENDING) - return; - - CERROR ("pathreq %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -void -kibnal_connect_peer (kib_peer_t *peer) -{ - QUERY *qry; - FSTATUS frc; - kib_conn_t *conn; - - LASSERT (peer->ibp_connecting != 0); - - conn = kibnal_create_conn(peer->ibp_nid, peer->ibp_version); - if (conn == NULL) { - CERROR ("Can't allocate conn\n"); - kibnal_peer_connect_failed(peer, IBNAL_CONN_ACTIVE, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - qry = &conn->ibc_cvars->cv_query; - memset(qry, 0, sizeof(*qry)); - - qry->OutputType = OutputTypeServiceRecord; - qry->InputType = InputTypeServiceRecord; - - qry->InputValue.ServiceRecordValue.ComponentMask = - KIBNAL_SERVICE_KEY_MASK; - kibnal_set_service_keys( - &qry->InputValue.ServiceRecordValue.ServiceRecord, - peer->ibp_nid); - - /* kibnal_service_get_callback gets my conn ref */ - frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd, - kibnal_data.kib_port_guid, - qry, - kibnal_service_get_callback, - &kibnal_data.kib_sdretry, - conn); - if (frc == FPENDING) - return; - - CERROR("Lookup %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), frc); - kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH); -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_peers (int idx) -{ - rwlock_t *rwlock = &kibnal_data.kib_global_lock; - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(rwlock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - if (peer->ibp_passivewait) { - LASSERT (list_empty(&peer->ibp_conns)); - - if (!time_after_eq(jiffies, - peer->ibp_passivewait_deadline)) - continue; - - kibnal_peer_addref(peer); /* ++ ref for me... */ - read_unlock_irqrestore(rwlock, flags); - - kibnal_peer_connect_failed(peer, IBNAL_CONN_WAITING, - -ETIMEDOUT); - kibnal_peer_decref(peer); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kibnal_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(rwlock, flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(rwlock, flags); -} - -void -kibnal_disconnect_conn (kib_conn_t *conn) -{ - FSTATUS frc; - - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTING); - - kibnal_conn_disconnected(conn); - - frc = iba_cm_disconnect(conn->ibc_cep, NULL, NULL); - switch (frc) { - case FSUCCESS: - break; - - case FINSUFFICIENT_RESOURCES: - CERROR("ENOMEM disconnecting %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* This might cause the module to become unloadable since the - * FCM_DISCONNECTED callback is still outstanding */ - break; - - default: - CERROR("Unexpected error disconnecting %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), frc); - LBUG(); - } - - kibnal_peer_notify(conn->ibc_peer); -} - -int -kibnal_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - kib_peer_t *peer; - int timeout; - int i; - int did_something; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - if (!list_empty (&kibnal_data.kib_connd_zombies)) { - conn = list_entry (kibnal_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_destroy_conn(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_conns)) { - conn = list_entry (kibnal_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_disconnect_conn(conn); - kibnal_conn_decref(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_peers)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - - kibnal_connect_peer (peer); - kibnal_peer_decref (peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - while ((timeout = (int)(deadline - jiffies)) <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_peers (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - } - - if (did_something) - continue; - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - if (!kibnal_data.kib_shutdown && - list_empty (&kibnal_data.kib_connd_conns) && - list_empty (&kibnal_data.kib_connd_peers)) - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - - -void -kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev) -{ - /* XXX flesh out. this seems largely for async errors */ - CERROR("type: %d code: %u\n", ev->EventType, ev->EventCode); -} - -void -kibnal_hca_callback (void *hca_arg, void *cq_arg) -{ - unsigned long flags; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - FSTATUS frc; - FSTATUS frc2; - IB_WORK_COMPLETION wc; - kib_rx_t *rx; - unsigned long flags; - __u64 rxseq = 0; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (busy_loops++ >= IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - if (kibnal_data.kib_ready && - !kibnal_data.kib_checking_cq) { - /* take ownership of completion polling */ - kibnal_data.kib_checking_cq = 1; - /* Assume I'll exhaust the CQ */ - kibnal_data.kib_ready = 0; - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - frc = iba_poll_cq(kibnal_data.kib_cq, &wc); - if (frc == FNOT_DONE) { - /* CQ empty */ - frc2 = iba_rearm_cq(kibnal_data.kib_cq, - CQEventSelNextWC); - LASSERT (frc2 == FSUCCESS); - } - - if (frc == FSUCCESS && - kibnal_wreqid2type(wc.WorkReqId) == IBNAL_WID_RX) { - rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.WorkReqId); - - /* Grab the RX sequence number NOW before - * anyone else can get an RX completion */ - rxseq = rx->rx_conn->ibc_rxseq++; - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - /* give up ownership of completion polling */ - kibnal_data.kib_checking_cq = 0; - - if (frc == FNOT_DONE) - continue; - - LASSERT (frc == FSUCCESS); - /* Assume there's more: get another scheduler to check - * while I handle this completion... */ - - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - switch (kibnal_wreqid2type(wc.WorkReqId)) { - case IBNAL_WID_RX: - kibnal_rx_complete(&wc, rxseq); - break; - - case IBNAL_WID_TX: - kibnal_tx_complete(&wc); - break; - - case IBNAL_WID_RDMA: - /* We only get RDMA completion notification if - * it fails. So we just ignore them completely - * because... - * - * 1) If an RDMA fails, all subsequent work - * items, including the final SEND will fail - * too, so I'm still guaranteed to notice that - * this connection is hosed. - * - * 2) It's positively dangerous to look inside - * the tx descriptor obtained from an RDMA work - * item. As soon as I drop the kib_sched_lock, - * I give a scheduler on another CPU a chance - * to get the final SEND completion, so the tx - * descriptor can get freed as I inspect it. */ - CERROR ("RDMA failed: %d\n", wc.Status); - break; - - default: - LBUG(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - continue; - } - - /* Nothing to do; sleep... */ - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - schedule(); - - remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/iiblnd/iiblnd_modparams.c b/lnet/klnds/iiblnd/iiblnd_modparams.c deleted file mode 100644 index ceb6e5d2ae65fdfd46745232baf2ce990897af2f..0000000000000000000000000000000000000000 --- a/lnet/klnds/iiblnd/iiblnd_modparams.c +++ /dev/null @@ -1,179 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "iiblnd.h" - -static char *ipif_basename = "ib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static char *service_name = "iiblnd"; -CFS_MODULE_PARM(service_name, "s", charp, 0444, - "IB service name"); - -static int service_number = 0x11b9a2; -CFS_MODULE_PARM(service_number, "i", int, 0444, - "IB service number"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int sd_retries = 8; -CFS_MODULE_PARM(sd_retries, "i", int, 0444, - "# times to retry SD queries"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int concurrent_sends = IBNAL_RX_MSGS; -CFS_MODULE_PARM(concurrent_sends, "i", int, 0644, - "Send work queue sizing"); - -kib_tunables_t kibnal_tunables = { - .kib_ipif_basename = &ipif_basename, - .kib_service_name = &service_name, - .kib_service_number = &service_number, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_sd_retries = &sd_retries, - .kib_concurrent_sends = &concurrent_sends, -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -/* NB max_size specified for proc_dostring entries only needs to be big enough - * not to truncate the printout; it only needs to be the actual size of the - * string buffer if we allow writes (and we don't) */ - -static ctl_table kibnal_ctl_table[] = { - {1, "ipif_basename", &ipif_basename, - 1024, 0444, NULL, &proc_dostring}, - {2, "service_name", &service_name, - 1024, 0444, NULL, &proc_dostring}, - {3, "service_number", &service_number, - sizeof(int), 0444, NULL, &proc_dointvec}, - {4, "min_reconnect_interval", &min_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {5, "max_reconnect_interval", &max_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {6, "concurrent_peers", &concurrent_peers, - sizeof(int), 0444, NULL, &proc_dointvec}, - {7, "cksum", &cksum, - sizeof(int), 0644, NULL, &proc_dointvec}, - {8, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {9, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {10, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {11, "peer_credits", &peer_credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {12, "sd_retries", &sd_retries, - sizeof(int), 0444, NULL, &proc_dointvec}, - {13, "keepalive", &keepalive, - sizeof(int), 0644, NULL, &proc_dointvec}, - {14, "concurrent_sends", &concurrent_sends, - sizeof(int), 0644, NULL, &proc_dointvec}, - {0} -}; - -static ctl_table kibnal_top_ctl_table[] = { - {203, "openibnal", NULL, 0, 0555, kibnal_ctl_table}, - {0} -}; - -int -kibnal_tunables_init () -{ - kibnal_tunables.kib_sysctl = - register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS) - *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS; - if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE) - *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE; - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/mxlnd/.cvsignore b/lnet/klnds/mxlnd/.cvsignore deleted file mode 100644 index 26bf56c4c271a5a4da943cb6c1beaebe3c8bdf49..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend - diff --git a/lnet/klnds/mxlnd/Makefile.in b/lnet/klnds/mxlnd/Makefile.in deleted file mode 100644 index 378dbdd346591d0a9ac89ae7d651ed2c8b40e9b8..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kmxlnd -kmxlnd-objs := mxlnd.o mxlnd_cb.o mxlnd_modparams.o - -EXTRA_POST_CFLAGS := @MXCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/mxlnd/README b/lnet/klnds/mxlnd/README deleted file mode 100644 index cc87e7a0345058e8ad297bc606effded11d13f68..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/README +++ /dev/null @@ -1,190 +0,0 @@ -************************************************************************* -* * -* Myrinet Express Lustre Networking Driver (MXLND) documentation * -* * -************************************************************************* - -README of MXLND - -MXLND provides support for Myricom's Myrinet Express (MX) communication -layer in Lustre. - -MXLND may be used with either MX-10G or MX-2G. See MX's README for -supported NICs. - -Table of Contents: - I. Installation - 1. Configuring and compiling - 2. Module Parameters - II. MXLND Performance - III. Caveats - 1. Systems with different page sizes - 2. Multi-homing - 3. MX endpoint collision - IV. License - V. Support - -================ -I. Installation -================ - -MXLND is supported on Linux 2.6. It may be possible to run it on 2.4, -but it has not been tested. MXLND requires Myricom's MX version 1.2.1 -or higher. See MX's README for the supported list of processors. - -1. Configuring and compiling - -MXLND should be already integrated into the Lustre build process. To -build MXLND, you will need to set the path to your MX installation -in Lustre's ./configure: - - --with-mx=/opt/mx - -replacing /opt with the actual path. Configure will check to ensure that -the MX version has the required functions. If not, it will fail to build. -To check if MXLND built, look for: - - checking whether to enable Myrinet MX support... yes - -in configure's output or the presence of Makefile in -$LUSTRE/lnet/klnds/mxlnd. - -2. Module Parameters - -MXLND supports a number of load-time parameters using Linux's module -parameter system. On our test systems, we created the following file: - - /etc/modprobe.d/kmxlnd - -On some (older?) systems, you may need to modify /etc/modprobe.conf. - -The available options are: - - n_waitd # of completion daemons - max_peers maximum number of peers that may connect - cksum set non-zero to enable small message (< 4KB) checksums - ntx # of total tx message descriptors - credits # concurrent sends to a single peer - board index value of the Myrinet board (NIC) - ep_id MX endpoint ID - polling Use 0 to block (wait). A value > 0 will poll that many times before blocking - hosts IP-to-hostname resolution file - -Of these, only hosts is required. It must be the absolute path to the -MXLND hosts file. For example: - - options kmxlnd hosts=/etc/hosts.mxlnd - -The file format for the hosts file is as follows: - -IP HOST BOARD EP_ID - -The values must be space and/or tab separated where: - - IP is a valid IPv4 address - HOST is the name returned by `hostname` on that machine - BOARD is the index of the Myricom NIC (0 for the first card, etc.) - EP_ID is the MX endpoint ID - -You may want to vary the remaining options to obtain the optimal performance -for your platform. - - n_waitd sets the number of threads that process completed MX requests -(sends and receives). In our testing, the default of 1 performed best. - - max_peers tells MXLND the upper limit of machines that it will need to -communicate with. This affects how many receives it will pre-post and each -receive will use one page of memory. Ideally, on clients, this value will -be equal to the total number of Lustre servers (MDS and OSS). On servers, -it needs to equal the total number of machines in the storage system. - - cksum turns on small message checksums. It can be used to aid in trouble- -shooting. MX also provides an optional checksumming feature which can check -all messages (large and small). See the MX README for details. - - ntx is the number of total sends in flight from this machine. In actuality, -MXLND reserves half of them for connect messages so make this value twice as large -as you want for the total number of sends in flight. - - credits is the number of in-flight messages for a specific peer. This is part -of the flow-control system in Lustre. Increasing this value may improve performance -but it requires more memory since each message requires at least one page. - - board is the index of the Myricom NIC. Hosts can have multiple Myricom NICs -and this identifies which one MXLND should use. This value must match the board -value in your MXLND hosts file for this host. - - ep_id is the MX endpoint ID. Each process that uses MX is required to have at -least one MX endpoint to access the MX library and NIC. The ID is a simple index -starting at 0. This value must match the endpoint ID value in your MXLND hosts -file for this host. - - polling determines whether this host will poll or block for MX request com- -pletions. A value of 0 blocks and any positive value will poll that many times -before blocking. Since polling increases CPU usage, we suggest you set this to -0 on the client and experiment with different values for servers. - -===================== -II. MXLND Performance -===================== - -On MX-2G systems, MXLND should easily saturate the link and use minimal CPU -(5-10% for read and write operations). On MX-10G systems, MXLND can saturate -the link and use moderate CPU resources (20-30% for read and write operations). -MX-10G relies on PCI-Express which is relatively new and performance varies -considerably by processor, motherboard and PCI-E chipset. Refer to Myricom's -website for the latest DMA read/write performance results by motherboard. The -DMA results will place an upper-bound on MXLND performance. - -============ -III. Caveats -============ - -1. Systems with different page sizes - -MXLND will set the maximum small message size equal to the kernel's page size. -This means that machines running MXLND that have different page sizes are not -able to communicate with each other. If you wish to run MXLND in this case, -send email to help@myri.com. - -2. Multi-homing - -At this time, the MXLND cannot drive more than one interface at a time. Thus, -a single Lustre router cannot route between two MX-10G, between two MX-2G, or -between MX-10G and MX-2G fabrics. - -3. MX endpoint collision - -Each process that uses MX is required to have at least one MX endpoint to -access the MX library and NIC. Other processes may need to use MX and no two -processes can use the same endpoint ID. MPICH-MX dynamically chooses one at -MPI startup and should not interfere with MXLND. Sockets-MX, on the other hand, -is hard coded to use 0 for its ID. If it is possible that anyone will want to -run Sockets-MX on this system, use a non-0 value for MXLND's endpoint ID. - - -=========== -IV. License -=========== - -MXLND is copyright (C) 2006 of Myricom, Inc. - -MXLND is part of Lustre, http://www.lustre.org. - -MXLND is free software; you can redistribute it and/or modify it under the -terms of version 2 of the GNU General Public License as published by the Free -Software Foundation. - -MXLND is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A -PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. - -========== -V. Support -========== - -If you have questions about MXLND, please contact help@myri.com. diff --git a/lnet/klnds/mxlnd/autoMakefile.am b/lnet/klnds/mxlnd/autoMakefile.am deleted file mode 100644 index 1d94f8680f46d3f65995d923e6441016d8826696..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_MXLND -modulenet_DATA = kmxlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kmxlnd-objs:%.o=%.c) mxlnd.h diff --git a/lnet/klnds/mxlnd/mxlnd.c b/lnet/klnds/mxlnd/mxlnd.c deleted file mode 100644 index d4287ba2be004b7b3a8d28cd95d6153b07c67171..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd.c +++ /dev/null @@ -1,920 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -lnd_t the_kmxlnd = { - .lnd_type = MXLND, - .lnd_startup = mxlnd_startup, - .lnd_shutdown = mxlnd_shutdown, - .lnd_ctl = mxlnd_ctl, - .lnd_send = mxlnd_send, - .lnd_recv = mxlnd_recv, -}; - -kmx_data_t kmxlnd_data; - -/** - * mxlnd_ctx_free - free ctx struct - * @ctx - a kmx_peer pointer - * - * The calling function should remove the ctx from the ctx list first - * then free it. - */ -void -mxlnd_ctx_free(struct kmx_ctx *ctx) -{ - if (ctx == NULL) return; - - if (ctx->mxc_page != NULL) { - __free_page(ctx->mxc_page); - spin_lock(&kmxlnd_data.kmx_global_lock); - kmxlnd_data.kmx_mem_used -= MXLND_EAGER_SIZE; - spin_unlock(&kmxlnd_data.kmx_global_lock); - } - - if (ctx->mxc_seg_list != NULL) { - LASSERT(ctx->mxc_nseg > 0); - MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t)); - } - - MXLND_FREE (ctx, sizeof (*ctx)); - return; -} - -/** - * mxlnd_ctx_alloc - allocate and initialize a new ctx struct - * @ctxp - address of a kmx_ctx pointer - * - * Returns 0 on success and -EINVAL, -ENOMEM on failure - */ -int -mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type) -{ - int ret = 0; - struct kmx_ctx *ctx = NULL; - - if (ctxp == NULL) return -EINVAL; - - MXLND_ALLOC(ctx, sizeof (*ctx)); - if (ctx == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate ctx\n"); - return -ENOMEM; - } - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->mxc_lock); - - ctx->mxc_type = type; - ctx->mxc_page = alloc_page (GFP_KERNEL); - if (ctx->mxc_page == NULL) { - CDEBUG(D_NETERROR, "Can't allocate page\n"); - ret = -ENOMEM; - goto failed; - } - spin_lock(&kmxlnd_data.kmx_global_lock); - kmxlnd_data.kmx_mem_used += MXLND_EAGER_SIZE; - spin_unlock(&kmxlnd_data.kmx_global_lock); - ctx->mxc_msg = (struct kmx_msg *)((char *)page_address(ctx->mxc_page)); - ctx->mxc_seg.segment_ptr = MX_PA_TO_U64(lnet_page2phys(ctx->mxc_page)); - ctx->mxc_state = MXLND_CTX_IDLE; - - *ctxp = ctx; - return 0; - -failed: - mxlnd_ctx_free(ctx); - return ret; -} - -/** - * mxlnd_ctx_init - reset ctx struct to the default values - * @ctx - a kmx_ctx pointer - */ -void -mxlnd_ctx_init(struct kmx_ctx *ctx) -{ - if (ctx == NULL) return; - - /* do not change mxc_type */ - ctx->mxc_incarnation = 0; - ctx->mxc_deadline = 0; - ctx->mxc_state = MXLND_CTX_IDLE; - /* ignore mxc_global_list */ - if (ctx->mxc_list.next != NULL && !list_empty(&ctx->mxc_list)) { - if (ctx->mxc_peer != NULL) - spin_lock(&ctx->mxc_lock); - list_del_init(&ctx->mxc_list); - if (ctx->mxc_peer != NULL) - spin_unlock(&ctx->mxc_lock); - } - /* ignore mxc_rx_list */ - /* ignore mxc_lock */ - ctx->mxc_nid = 0; - ctx->mxc_peer = NULL; - ctx->mxc_conn = NULL; - /* ignore mxc_msg */ - /* ignore mxc_page */ - ctx->mxc_lntmsg[0] = NULL; - ctx->mxc_lntmsg[1] = NULL; - ctx->mxc_msg_type = 0; - ctx->mxc_cookie = 0LL; - ctx->mxc_match = 0LL; - /* ctx->mxc_seg.segment_ptr points to mxc_page */ - ctx->mxc_seg.segment_length = 0; - if (ctx->mxc_seg_list != NULL) { - LASSERT(ctx->mxc_nseg > 0); - MXLND_FREE(ctx->mxc_seg_list, ctx->mxc_nseg * sizeof(mx_ksegment_t)); - } - ctx->mxc_seg_list = NULL; - ctx->mxc_nseg = 0; - ctx->mxc_nob = 0; - ctx->mxc_mxreq = NULL; - memset(&ctx->mxc_status, 0, sizeof(mx_status_t)); - /* ctx->mxc_get */ - /* ctx->mxc_put */ - - ctx->mxc_msg->mxm_type = 0; - ctx->mxc_msg->mxm_credits = 0; - ctx->mxc_msg->mxm_nob = 0; - ctx->mxc_msg->mxm_seq = 0; - - return; -} - -/** - * mxlnd_free_txs - free kmx_txs and associated pages - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_txs(void) -{ - struct kmx_ctx *tx = NULL; - struct kmx_ctx *next = NULL; - - list_for_each_entry_safe(tx, next, &kmxlnd_data.kmx_txs, mxc_global_list) { - list_del_init(&tx->mxc_global_list); - mxlnd_ctx_free(tx); - } - return; -} - -/** - * mxlnd_init_txs - allocate tx descriptors then stash on txs and idle tx lists - * - * Called from mxlnd_startup() - * returns 0 on success, else -ENOMEM - */ -int -mxlnd_init_txs(void) -{ - int ret = 0; - int i = 0; - struct kmx_ctx *tx = NULL; - - for (i = 0; i < *kmxlnd_tunables.kmx_ntx; i++) { - ret = mxlnd_ctx_alloc(&tx, MXLND_REQ_TX); - if (ret != 0) { - mxlnd_free_txs(); - return ret; - } - mxlnd_ctx_init(tx); - /* in startup(), no locks required */ - list_add_tail(&tx->mxc_global_list, &kmxlnd_data.kmx_txs); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle); - } - return 0; -} - -/** - * mxlnd_free_rxs - free initial kmx_rx descriptors and associated pages - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_rxs(void) -{ - struct kmx_ctx *rx = NULL; - struct kmx_ctx *next = NULL; - - list_for_each_entry_safe(rx, next, &kmxlnd_data.kmx_rxs, mxc_global_list) { - list_del_init(&rx->mxc_global_list); - mxlnd_ctx_free(rx); - } - return; -} - -/** - * mxlnd_init_rxs - allocate initial rx descriptors - * - * Called from startup(). We create MXLND_MAX_PEERS plus MXLND_NTX - * rx descriptors. We create one for each potential peer to handle - * the initial connect request. We create on for each tx in case the - * send requires a non-eager receive. - * - * Returns 0 on success, else -ENOMEM - */ -int -mxlnd_init_rxs(void) -{ - int ret = 0; - int i = 0; - struct kmx_ctx *rx = NULL; - - for (i = 0; i < (*kmxlnd_tunables.kmx_ntx + *kmxlnd_tunables.kmx_max_peers); i++) { - ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX); - if (ret != 0) { - mxlnd_free_rxs(); - return ret; - } - mxlnd_ctx_init(rx); - /* in startup(), no locks required */ - list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs); - list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle); - } - return 0; -} - -/** - * mxlnd_free_peers - free peers - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_peers(void) -{ - int i = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *next = NULL; - - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry_safe(peer, next, &kmxlnd_data.kmx_peers[i], mxp_peers) { - list_del_init(&peer->mxp_peers); - if (peer->mxp_conn) mxlnd_conn_decref(peer->mxp_conn); - mxlnd_peer_decref(peer); - } - } -} - -int -mxlnd_host_alloc(struct kmx_host **hostp) -{ - struct kmx_host *host = NULL; - - MXLND_ALLOC(host, sizeof (*host)); - if (host == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate host\n"); - return -1; - } - memset(host, 0, sizeof(*host)); - spin_lock_init(&host->mxh_lock); - - *hostp = host; - - return 0; -} - -void -mxlnd_host_free(struct kmx_host *host) -{ - if (host == NULL) return; - - if (host->mxh_hostname != NULL) - MXLND_FREE(host->mxh_hostname, strlen(host->mxh_hostname) + 1); - - MXLND_FREE(host, sizeof(*host)); - return; -} - -/** - * mxlnd_free_hosts - free kmx_hosts - * - * Called from mxlnd_shutdown() - */ -void -mxlnd_free_hosts(void) -{ - struct kmx_host *host = NULL; - struct kmx_host *next = NULL; - - list_for_each_entry_safe(host, next, &kmxlnd_data.kmx_hosts, mxh_list) { - list_del_init(&host->mxh_list); - mxlnd_host_free(host); - } - return; -} - -#define xstr(s) #s -#define str(s) xstr(s) -#define MXLND_MAX_BOARD 4 /* we expect hosts to have fewer NICs than this */ -#define MXLND_MAX_EP_ID 16 /* we expect hosts to have less than this endpoints */ - -/* this parses a line that consists of: - * - * IP HOSTNAME BOARD ENDPOINT ID - * 169.192.0.113 mds01 0 3 - * - * By default MX uses the alias (short hostname). If you override - * it using mx_hostname to use the FQDN or some other name, the hostname - * here must match exactly. - */ - -/* MX_MAX_HOSTNAME_LEN = 80. See myriexpress.h */ -int -mxlnd_parse_line(char *line) -{ - int i = 0; - int ret = 0; - int len = 0; - u32 ip[4] = { 0, 0, 0, 0 }; - char hostname[MX_MAX_HOSTNAME_LEN]; - u32 board = -1; - u32 ep_id = -1; - struct kmx_host *host = NULL; - - if (line == NULL) return -1; - - len = strlen(line); - - if (len == 0) return -1; - - /* convert tabs to spaces */ - for (i = 0; i < len; i++) { - if (line[i] == '\t') line[i] = ' '; - } - - memset(&hostname, 0 , sizeof(hostname)); - ret = sscanf(line, "%d.%d.%d.%d %" str(MX_MAX_HOSTNAME_LEN) "s %d %d", - &ip[0], &ip[1], &ip[2], &ip[3], hostname, &board, &ep_id); - - if (ret != 7) { - return -1; - } - - /* check for valid values */ - /* we assume a valid IP address (all <= 255), number of NICs, - * and number of endpoint IDs */ - if (ip[0] > 255 || ip [1] > 255 || ip[2] > 255 || ip[3] > 255 || - board > MXLND_MAX_BOARD || ep_id > MXLND_MAX_EP_ID) { - CDEBUG(D_NETERROR, "Illegal value in \"%s\". Ignoring " - "this host.\n", line); - return -1; - } - - ret = mxlnd_host_alloc(&host); - if (ret != 0) return -1; - - host->mxh_addr = ((ip[0]<<24)|(ip[1]<<16)|(ip[2]<<8)|ip[3]); - len = strlen(hostname); - MXLND_ALLOC(host->mxh_hostname, len + 1); - memset(host->mxh_hostname, 0, len + 1); - strncpy(host->mxh_hostname, hostname, len); - host->mxh_board = board; - host->mxh_ep_id = ep_id; - - spin_lock(&kmxlnd_data.kmx_hosts_lock); - list_add_tail(&host->mxh_list, &kmxlnd_data.kmx_hosts); - spin_unlock(&kmxlnd_data.kmx_hosts_lock); - - return 0; -} - -void -mxlnd_print_hosts(void) -{ -#if MXLND_DEBUG - struct kmx_host *host = NULL; - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - int ip[4]; - u32 addr = host->mxh_addr; - - ip[0] = (addr >> 24) & 0xff; - ip[1] = (addr >> 16) & 0xff; - ip[2] = (addr >> 8) & 0xff; - ip[3] = addr & 0xff; - CDEBUG(D_NET, "\tip= %d.%d.%d.%d\n\thost= %s\n\tboard= %d\n\tep_id= %d\n\n", - ip[0], ip[1], ip[2], ip[3], - host->mxh_hostname, host->mxh_board, host->mxh_ep_id); - } -#endif - return; -} - -#define MXLND_BUFSIZE (PAGE_SIZE - 1) - -int -mxlnd_parse_hosts(char *filename) -{ - int ret = 0; - s32 size = 0; - s32 bufsize = MXLND_BUFSIZE; - s32 allocd = 0; - loff_t offset = 0; - struct file *filp = NULL; - char *buf = NULL; - s32 buf_off = 0; - char *sep = NULL; - char *line = NULL; - - if (filename == NULL) return -1; - - filp = filp_open(filename, O_RDONLY, 0); - if (IS_ERR(filp)) { - CERROR("filp_open() failed for %s\n", filename); - return -1; - } - - size = (s32) filp->f_dentry->d_inode->i_size; - if (size < MXLND_BUFSIZE) bufsize = size; - allocd = bufsize; - MXLND_ALLOC(buf, allocd + 1); - if (buf == NULL) { - CERROR("Cannot allocate buf\n"); - filp_close(filp, current->files); - return -1; - } - - while (offset < size) { - memset(buf, 0, bufsize + 1); - ret = kernel_read(filp, (unsigned long) offset, buf, (unsigned long) bufsize); - if (ret < 0) { - CDEBUG(D_NETERROR, "kernel_read() returned %d - closing %s\n", ret, filename); - filp_close(filp, current->files); - MXLND_FREE(buf, allocd + 1); - return -1; - } - - if (ret < bufsize) bufsize = ret; - buf_off = 0; - while (buf_off < bufsize) { - sep = strchr(buf + buf_off, '\n'); - if (sep != NULL) { - /* we have a line */ - line = buf + buf_off; - *sep = '\0'; - ret = mxlnd_parse_line(line); - if (ret != 0 && strlen(line) != 0) { - CDEBUG(D_NETERROR, "Failed to parse \"%s\". Ignoring this host.\n", line); - } - buf_off += strlen(line) + 1; - } else { - /* last line or we need to read more */ - line = buf + buf_off; - ret = mxlnd_parse_line(line); - if (ret != 0) { - bufsize -= strlen(line) + 1; - } - buf_off += strlen(line) + 1; - } - } - offset += bufsize; - bufsize = MXLND_BUFSIZE; - } - - MXLND_FREE(buf, allocd + 1); - filp_close(filp, current->files); - mxlnd_print_hosts(); - - return 0; -} - -/** - * mxlnd_init_mx - open the endpoint, set out ID, register the EAGER callback - * @ni - the network interface - * - * Returns 0 on success, -1 on failure - */ -int -mxlnd_init_mx(lnet_ni_t *ni) -{ - int ret = 0; - int found = 0; - mx_return_t mxret; - mx_endpoint_addr_t addr; - u32 board = *kmxlnd_tunables.kmx_board; - u32 ep_id = *kmxlnd_tunables.kmx_ep_id; - u64 nic_id = 0LL; - struct kmx_host *host = NULL; - - mxret = mx_init(); - if (mxret != MX_SUCCESS) { - CERROR("mx_init() failed with %s (%d)\n", mx_strerror(mxret), mxret); - return -1; - } - - ret = mxlnd_parse_hosts(*kmxlnd_tunables.kmx_hosts); - if (ret != 0) { - if (*kmxlnd_tunables.kmx_hosts != NULL) { - CERROR("mxlnd_parse_hosts(%s) failed\n", *kmxlnd_tunables.kmx_hosts); - } - mx_finalize(); - return -1; - } - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - if (strcmp(host->mxh_hostname, system_utsname.nodename) == 0) { - /* override the defaults and module parameters with - * the info from the hosts file */ - board = host->mxh_board; - ep_id = host->mxh_ep_id; - kmxlnd_data.kmx_localhost = host; - CDEBUG(D_NET, "my hostname is %s board %d ep_id %d\n", kmxlnd_data.kmx_localhost->mxh_hostname, kmxlnd_data.kmx_localhost->mxh_board, kmxlnd_data.kmx_localhost->mxh_ep_id); - found = 1; - break; - } - } - - if (found == 0) { - CERROR("no host entry found for localhost\n"); - mx_finalize(); - return -1; - } - - mxret = mx_open_endpoint(board, ep_id, MXLND_MSG_MAGIC, - NULL, 0, &kmxlnd_data.kmx_endpt); - if (mxret != MX_SUCCESS) { - CERROR("mx_open_endpoint() failed with %d\n", mxret); - mx_finalize(); - return -1; - } - - mx_get_endpoint_addr(kmxlnd_data.kmx_endpt, &addr); - mx_decompose_endpoint_addr(addr, &nic_id, &ep_id); - - LASSERT(host != NULL); - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), host->mxh_addr); - - CDEBUG(D_NET, "My NID is 0x%llx\n", ni->ni_nid); - - /* this will catch all unexpected receives. */ - mxret = mx_register_unexp_handler(kmxlnd_data.kmx_endpt, - (mx_unexp_handler_t) mxlnd_unexpected_recv, - NULL); - if (mxret != MX_SUCCESS) { - CERROR("mx_register_unexp_callback() failed with %s\n", - mx_strerror(mxret)); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - return -1; - } - mxret = mx_set_request_timeout(kmxlnd_data.kmx_endpt, NULL, MXLND_COMM_TIMEOUT/HZ*1000); - if (mxret != MX_SUCCESS) { - CERROR("mx_set_request_timeout() failed with %s\n", - mx_strerror(mxret)); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - return -1; - } - return 0; -} - - -/** - * mxlnd_thread_start - spawn a kernel thread with this function - * @fn - function pointer - * @arg - pointer to the parameter data - * - * Returns 0 on success and a negative value on failure - */ -int -mxlnd_thread_start(int (*fn)(void *arg), void *arg) -{ - int pid = 0; - int i = (int) ((long) arg); - - atomic_inc(&kmxlnd_data.kmx_nthreads); - init_completion(&kmxlnd_data.kmx_completions[i]); - - pid = kernel_thread (fn, arg, 0); - if (pid <= 0) { - CERROR("mx_thread_start() failed with %d\n", pid); - atomic_dec(&kmxlnd_data.kmx_nthreads); - } - return pid; -} - -/** - * mxlnd_thread_stop - decrement thread counter - * - * The thread returns 0 when it detects shutdown. - * We are simply decrementing the thread counter. - */ -void -mxlnd_thread_stop(long id) -{ - int i = (int) id; - atomic_dec (&kmxlnd_data.kmx_nthreads); - complete(&kmxlnd_data.kmx_completions[i]); -} - -/** - * mxlnd_shutdown - stop IO, clean up state - * @ni - LNET interface handle - * - * No calls to the LND should be made after calling this function. - */ -void -mxlnd_shutdown (lnet_ni_t *ni) -{ - int i = 0; - - LASSERT (ni == kmxlnd_data.kmx_ni); - LASSERT (ni->ni_data == &kmxlnd_data); - CDEBUG(D_NET, "in shutdown()\n"); - - CDEBUG(D_MALLOC, "before MXLND cleanup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - switch (kmxlnd_data.kmx_init) { - - case MXLND_INIT_ALL: - - CDEBUG(D_NET, "setting shutdown = 1\n"); - /* set shutdown and wakeup request_waitds */ - kmxlnd_data.kmx_shutdown = 1; - mb(); - mx_wakeup(kmxlnd_data.kmx_endpt); - up(&kmxlnd_data.kmx_tx_queue_sem); - mxlnd_sleep(2 * HZ); - - /* fall through */ - - case MXLND_INIT_THREADS: - - CDEBUG(D_NET, "waiting on threads\n"); - /* wait for threads to complete */ - for (i = 0; i < MXLND_NCOMPLETIONS; i++) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - - CDEBUG(D_NET, "freeing completions\n"); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - - /* fall through */ - - case MXLND_INIT_MX: - - CDEBUG(D_NET, "stopping mx\n"); - - /* wakeup waiters if they missed the above. - * close endpoint to stop all traffic. - * this will cancel and cleanup all requests, etc. */ - - mx_wakeup(kmxlnd_data.kmx_endpt); - mx_close_endpoint(kmxlnd_data.kmx_endpt); - mx_finalize(); - - CDEBUG(D_NET, "mxlnd_free_hosts();\n"); - mxlnd_free_hosts(); - - /* fall through */ - - case MXLND_INIT_RXS: - - CDEBUG(D_NET, "freeing rxs\n"); - - /* free all rxs and associated pages */ - mxlnd_free_rxs(); - - /* fall through */ - - case MXLND_INIT_TXS: - - CDEBUG(D_NET, "freeing txs\n"); - - /* free all txs and associated pages */ - mxlnd_free_txs(); - - /* fall through */ - - case MXLND_INIT_DATA: - - CDEBUG(D_NET, "freeing peers\n"); - - /* free peer list */ - mxlnd_free_peers(); - - /* fall through */ - - case MXLND_INIT_NOTHING: - break; - } - CDEBUG(D_NET, "shutdown complete\n"); - - CDEBUG(D_MALLOC, "after MXLND cleanup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - kmxlnd_data.kmx_init = MXLND_INIT_NOTHING; - PORTAL_MODULE_UNUSE; - return; -} - -/** - * mxlnd_startup - initialize state, open an endpoint, start IO - * @ni - LNET interface handle - * - * Initialize state, open an endpoint, start monitoring threads. - * Should only be called once. - */ -int -mxlnd_startup (lnet_ni_t *ni) -{ - int i = 0; - int ret = 0; - struct timeval tv; - - LASSERT (ni->ni_lnd == &the_kmxlnd); - - if (kmxlnd_data.kmx_init != MXLND_INIT_NOTHING) { - CERROR("Only 1 instance supported\n"); - return -EPERM; - } - CDEBUG(D_MALLOC, "before MXLND startup: libcfs_kmemory %d " - "kmx_mem_used %ld\n", atomic_read (&libcfs_kmemory), - kmxlnd_data.kmx_mem_used); - - /* reserve 1/2 of tx for connect request messages */ - ni->ni_maxtxcredits = *kmxlnd_tunables.kmx_ntx / 2; - ni->ni_peertxcredits = *kmxlnd_tunables.kmx_credits; - - PORTAL_MODULE_USE; - memset (&kmxlnd_data, 0, sizeof (kmxlnd_data)); - - kmxlnd_data.kmx_ni = ni; - ni->ni_data = &kmxlnd_data; - - do_gettimeofday(&tv); - kmxlnd_data.kmx_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - CDEBUG(D_NET, "my incarnation is %lld\n", kmxlnd_data.kmx_incarnation); - - spin_lock_init (&kmxlnd_data.kmx_global_lock); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_conn_req); - spin_lock_init (&kmxlnd_data.kmx_conn_lock); - sema_init(&kmxlnd_data.kmx_conn_sem, 0); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_hosts); - spin_lock_init (&kmxlnd_data.kmx_hosts_lock); - - for (i = 0; i < MXLND_HASH_SIZE; i++) { - INIT_LIST_HEAD (&kmxlnd_data.kmx_peers[i]); - } - rwlock_init (&kmxlnd_data.kmx_peers_lock); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_txs); - INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_idle); - spin_lock_init (&kmxlnd_data.kmx_tx_idle_lock); - kmxlnd_data.kmx_tx_next_cookie = 1; - INIT_LIST_HEAD (&kmxlnd_data.kmx_tx_queue); - spin_lock_init (&kmxlnd_data.kmx_tx_queue_lock); - sema_init(&kmxlnd_data.kmx_tx_queue_sem, 0); - - INIT_LIST_HEAD (&kmxlnd_data.kmx_rxs); - spin_lock_init (&kmxlnd_data.kmx_rxs_lock); - INIT_LIST_HEAD (&kmxlnd_data.kmx_rx_idle); - spin_lock_init (&kmxlnd_data.kmx_rx_idle_lock); - - kmxlnd_data.kmx_init = MXLND_INIT_DATA; - /*****************************************************/ - - ret = mxlnd_init_txs(); - if (ret != 0) { - CERROR("Can't alloc tx descs: %d\n", ret); - goto failed; - } - kmxlnd_data.kmx_init = MXLND_INIT_TXS; - /*****************************************************/ - - ret = mxlnd_init_rxs(); - if (ret != 0) { - CERROR("Can't alloc rx descs: %d\n", ret); - goto failed; - } - kmxlnd_data.kmx_init = MXLND_INIT_RXS; - /*****************************************************/ - - ret = mxlnd_init_mx(ni); - if (ret != 0) { - CERROR("Can't init mx\n"); - goto failed; - } - - kmxlnd_data.kmx_init = MXLND_INIT_MX; - /*****************************************************/ - - /* start threads */ - - MXLND_ALLOC (kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - if (kmxlnd_data.kmx_completions == NULL) { - CERROR("failed to alloc kmxlnd_data.kmx_completions"); - goto failed; - } - memset(kmxlnd_data.kmx_completions, 0, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - - { - int i = 0; - if (MXLND_N_SCHED > *kmxlnd_tunables.kmx_n_waitd) { - *kmxlnd_tunables.kmx_n_waitd = MXLND_N_SCHED; - } - CDEBUG(D_NET, "using %d %s in mx_wait_any()\n", - *kmxlnd_tunables.kmx_n_waitd, - *kmxlnd_tunables.kmx_n_waitd == 1 ? "thread" : "threads"); - - for (i = 0; i < *kmxlnd_tunables.kmx_n_waitd; i++) { - ret = mxlnd_thread_start(mxlnd_request_waitd, (void*)((long)i)); - if (ret < 0) { - CERROR("Starting mxlnd_request_waitd[%d] failed with %d\n", i, ret); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - - goto failed; - } - } - ret = mxlnd_thread_start(mxlnd_tx_queued, (void*)((long)i++)); - if (ret < 0) { - CERROR("Starting mxlnd_tx_queued failed with %d\n", ret); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - goto failed; - } - ret = mxlnd_thread_start(mxlnd_timeoutd, (void*)((long)i++)); - if (ret < 0) { - CERROR("Starting mxlnd_timeoutd failed with %d\n", ret); - for (--i; i >= 0; i--) { - wait_for_completion(&kmxlnd_data.kmx_completions[i]); - } - LASSERT(atomic_read(&kmxlnd_data.kmx_nthreads) == 0); - MXLND_FREE(kmxlnd_data.kmx_completions, - MXLND_NCOMPLETIONS * sizeof(struct completion)); - goto failed; - } - } - - kmxlnd_data.kmx_init = MXLND_INIT_THREADS; - /*****************************************************/ - - kmxlnd_data.kmx_init = MXLND_INIT_ALL; - CDEBUG(D_MALLOC, "startup complete (kmx_mem_used %ld)\n", kmxlnd_data.kmx_mem_used); - - return 0; -failed: - CERROR("mxlnd_startup failed\n"); - mxlnd_shutdown (ni); - return (-ENETDOWN); -} - -static int mxlnd_init(void) -{ - lnet_register_lnd(&the_kmxlnd); - return 0; -} - -static void mxlnd_exit(void) -{ - lnet_unregister_lnd(&the_kmxlnd); - return; -} - -module_init(mxlnd_init); -module_exit(mxlnd_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Myricom, Inc. - help@myri.com"); -MODULE_DESCRIPTION("Kernel MyrinetExpress LND"); -MODULE_VERSION("0.5.0"); diff --git a/lnet/klnds/mxlnd/mxlnd.h b/lnet/klnds/mxlnd/mxlnd.h deleted file mode 100644 index 28e58caec2aee11a49b5b6fc308a11e8573e9782..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd.h +++ /dev/null @@ -1,415 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -#define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> /* module */ -#include <linux/kernel.h> /* module */ -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> /* module */ -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> -#include <linux/utsname.h> - -#include <net/sock.h> -#include <linux/in.h> - -#include <linux/netdevice.h> /* these are needed for ARP */ -#include <linux/if_arp.h> -#include <net/arp.h> -#include <linux/inetdevice.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include "libcfs/kp30.h" -#include "lnet/lnet.h" -#include "lnet/lib-lnet.h" - -#define MX_KERNEL 1 -#include "mx_extensions.h" -#include "myriexpress.h" - -#if LNET_MAX_IOV > MX_MAX_SEGMENTS - #error LNET_MAX_IOV is greater then MX_MAX_SEGMENTS -#endif - -/* Using MX's 64 match bits - * We are using the match bits to specify message type and the cookie. The - * highest four bits (60-63) are reserved for message type. Below we specify - * the types. MXLND_MASK_ICON_REQ and MXLND_MASK_ICON_ACK are used for - * mx_iconnect(). We reserve the remaining combinations for future use. The - * next 8 bits (52-59) are reserved for returning a status code for failed - * GET_DATA (payload) messages. The last 52 bits are used for cookies. That - * should allow unique cookies for 4 KB messages at 10 Gbps line rate without - * rollover for about 8 years. That should be enough. */ - -/* constants */ -#define MXLND_MASK_ICON_REQ (0xBLL << 60) /* it is a mx_iconnect() completion */ -#define MXLND_MASK_CONN_REQ (0xCLL << 60) /* CONN_REQ msg */ -#define MXLND_MASK_ICON_ACK (0x9LL << 60) /* it is a mx_iconnect() completion */ -#define MXLND_MASK_CONN_ACK (0xALL << 60) /* CONN_ACK msg*/ -#define MXLND_MASK_EAGER (0xELL << 60) /* EAGER msg */ -#define MXLND_MASK_NOOP (0x1LL << 60) /* NOOP msg */ -#define MXLND_MASK_PUT_REQ (0x2LL << 60) /* PUT_REQ msg */ -#define MXLND_MASK_PUT_ACK (0x3LL << 60) /* PUT_ACK msg */ -#define MXLND_MASK_PUT_DATA (0x4LL << 60) /* PUT_DATA msg */ -#define MXLND_MASK_GET_REQ (0x5LL << 60) /* GET_REQ msg */ -#define MXLND_MASK_GET_DATA (0x6LL << 60) /* GET_DATA msg */ -//#define MXLND_MASK_NAK (0x7LL << 60) /* NAK msg */ - -#define MXLND_MAX_COOKIE ((1LL << 52) - 1) /* when to roll-over the cookie value */ -#define MXLND_NCOMPLETIONS (MXLND_N_SCHED + 2) /* max threads for completion array */ - -/* defaults for configurable parameters */ -#define MXLND_N_SCHED 1 /* # schedulers (mx_wait_any() threads) */ -#define MXLND_MX_BOARD 0 /* Use the first MX NIC if more than 1 avail */ -#define MXLND_MX_EP_ID 3 /* MX endpoint ID */ -#define MXLND_COMM_TIMEOUT (20 * HZ) /* timeout for send/recv (jiffies) */ -#define MXLND_WAIT_TIMEOUT HZ /* timeout for wait (jiffies) */ -#define MXLND_POLLING 0 /* poll iterations before blocking */ -#define MXLND_MAX_PEERS 1024 /* number of nodes talking to me */ -#define MXLND_EAGER_NUM MXLND_MAX_PEERS /* number of pre-posted receives */ -#define MXLND_EAGER_SIZE PAGE_SIZE /* pre-posted eager message size */ -#define MXLND_MSG_QUEUE_DEPTH 8 /* msg queue depth */ -#define MXLND_CREDIT_HIGHWATER (MXLND_MSG_QUEUE_DEPTH - 2) - /* when to send a noop to return credits */ -#define MXLND_NTX 256 /* # of kmx_tx - total sends in flight - 1/2 are reserved for connect messages */ - -#define MXLND_HASH_BITS 6 /* the number of bits to hash over */ -#define MXLND_HASH_SIZE (1<<MXLND_HASH_BITS) - /* number of peer lists for lookup. - we hash over the last N bits of - the IP address converted to an int. */ -#define MXLND_HASH_MASK (MXLND_HASH_SIZE - 1) - /* ensure we use only the last N bits */ - -/* debugging features */ -#define MXLND_CKSUM 0 /* checksum kmx_msg_t */ -#define MXLND_DEBUG 0 /* turn on printk()s */ - -extern inline void mxlnd_noop(char *s, ...); -#if MXLND_DEBUG - #define MXLND_PRINT printk -#else - #define MXLND_PRINT mxlnd_noop -#endif - -/* provide wrappers around LIBCFS_ALLOC/FREE to keep MXLND specific - * memory usage stats that include pages */ - -#define MXLND_ALLOC(x, size) \ - do { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used += size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - LIBCFS_ALLOC(x, size); \ - if (x == NULL) { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used -= size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - } \ - } while (0) - -#define MXLND_FREE(x, size) \ - do { \ - spin_lock(&kmxlnd_data.kmx_global_lock); \ - kmxlnd_data.kmx_mem_used -= size; \ - spin_unlock(&kmxlnd_data.kmx_global_lock); \ - LIBCFS_FREE(x, size); \ - } while (0) - - -typedef struct kmx_tunables { - int *kmx_n_waitd; /* # completion threads */ - int *kmx_max_peers; /* max # of potential peers */ - int *kmx_cksum; /* checksum small msgs? */ - int *kmx_ntx; /* total # of tx (1/2 for LNET 1/2 for CONN_REQ */ - int *kmx_credits; /* concurrent sends to 1 peer */ - int *kmx_board; /* MX board (NIC) number */ - int *kmx_ep_id; /* MX endpoint number */ - int *kmx_polling; /* if 0, block. if > 0, poll this many - iterations before blocking */ - char **kmx_hosts; /* Location of hosts file, if used */ -} kmx_tunables_t; - -/* structure to hold IP-to-hostname resolution data */ -struct kmx_host { - struct kmx_peer *mxh_peer; /* pointer to matching peer */ - u32 mxh_addr; /* IP address as int */ - char *mxh_hostname; /* peer's hostname */ - u32 mxh_board; /* peer's board rank */ - u32 mxh_ep_id; /* peer's MX endpoint ID */ - struct list_head mxh_list; /* position on kmx_hosts */ - spinlock_t mxh_lock; /* lock */ -}; - -/* global interface state */ -typedef struct kmx_data -{ - int kmx_init; /* initialization state */ - int kmx_shutdown; /* shutting down? */ - atomic_t kmx_nthreads; /* number of threads */ - struct completion *kmx_completions; /* array of completion structs */ - lnet_ni_t *kmx_ni; /* the LND instance */ - u64 kmx_incarnation; /* my incarnation value - unused */ - long kmx_mem_used; /* memory used */ - struct kmx_host *kmx_localhost; /* pointer to my kmx_host info */ - mx_endpoint_t kmx_endpt; /* the MX endpoint */ - - spinlock_t kmx_global_lock; /* global lock */ - - struct list_head kmx_conn_req; /* list of connection requests */ - spinlock_t kmx_conn_lock; /* connection list lock */ - struct semaphore kmx_conn_sem; /* semaphore for connection request list */ - - struct list_head kmx_hosts; /* host lookup info */ - spinlock_t kmx_hosts_lock; /* hosts list lock */ - - struct list_head kmx_peers[MXLND_HASH_SIZE]; - /* list of all known peers */ - rwlock_t kmx_peers_lock; /* peer list rw lock */ - atomic_t kmx_npeers; /* number of peers */ - - struct list_head kmx_txs; /* all tx descriptors */ - struct list_head kmx_tx_idle; /* list of idle tx */ - spinlock_t kmx_tx_idle_lock; /* lock for idle tx list */ - s32 kmx_tx_used; /* txs in use */ - u64 kmx_tx_next_cookie; /* unique id for tx */ - struct list_head kmx_tx_queue; /* generic send queue */ - spinlock_t kmx_tx_queue_lock; /* lock for generic sends */ - struct semaphore kmx_tx_queue_sem; /* semaphore for tx queue */ - - struct list_head kmx_rxs; /* all rx descriptors */ - spinlock_t kmx_rxs_lock; /* lock for rxs list */ - struct list_head kmx_rx_idle; /* list of idle tx */ - spinlock_t kmx_rx_idle_lock; /* lock for idle rx list */ -} kmx_data_t; - -#define MXLND_INIT_NOTHING 0 /* in the beginning, there was nothing... */ -#define MXLND_INIT_DATA 1 /* main data structures created */ -#define MXLND_INIT_TXS 2 /* tx descriptors created */ -#define MXLND_INIT_RXS 3 /* initial rx descriptors created */ -#define MXLND_INIT_MX 4 /* initiate MX library, open endpoint, get NIC id */ -#define MXLND_INIT_THREADS 5 /* waitd, timeoutd, tx_queued threads */ -#define MXLND_INIT_ALL 6 /* startup completed */ - -#include "mxlnd_wire.h" - -enum kmx_req_type { - MXLND_REQ_TX = 0, - MXLND_REQ_RX = 1, -}; - -/* The life cycle of a request */ -enum kmx_req_state { - MXLND_CTX_INIT = 0, /* just created */ - MXLND_CTX_IDLE = 1, /* available for use */ - MXLND_CTX_PREP = 2, /* getting ready for send/recv */ - MXLND_CTX_PENDING = 3, /* mx_isend() or mx_irecv() called */ - MXLND_CTX_COMPLETED = 4, /* cleaning up after completion or timeout */ - MXLND_CTX_CANCELED = 5, /* timed out but still in ctx list */ -}; - -/* Context Structure - generic tx/rx descriptor - * It represents the context (or state) of each send or receive request. - * In other LNDs, they have separate TX and RX descriptors and this replaces both. - * - * We will keep the these on the global kmx_rxs and kmx_txs lists for cleanup - * during shutdown(). We will move them between the rx/tx idle lists and the - * pending list which is monitored by mxlnd_timeoutd(). - */ -struct kmx_ctx { - enum kmx_req_type mxc_type; /* TX or RX */ - u64 mxc_incarnation; /* store the peer's incarnation here - to verify before changing flow - control credits after completion */ - unsigned long mxc_deadline; /* request time out in absolute jiffies */ - enum kmx_req_state mxc_state; /* what is the state of the request? */ - struct list_head mxc_global_list; /* place on kmx_rxs or kmx_txs */ - struct list_head mxc_list; /* place on rx/tx idle list, tx q, peer tx */ - struct list_head mxc_rx_list; /* place on mxp_rx_posted list */ - spinlock_t mxc_lock; /* lock */ - - lnet_nid_t mxc_nid; /* dst's NID if peer is not known */ - struct kmx_peer *mxc_peer; /* owning peer */ - struct kmx_conn *mxc_conn; /* owning conn */ - struct kmx_msg *mxc_msg; /* msg hdr mapped to mxc_page */ - struct page *mxc_page; /* buffer for eager msgs */ - lnet_msg_t *mxc_lntmsg[2]; /* lnet msgs to finalize */ - - u8 mxc_msg_type; /* what type of message is this? */ - u64 mxc_cookie; /* completion cookie */ - u64 mxc_match; /* MX match info */ - mx_ksegment_t mxc_seg; /* local MX ksegment for non-DATA */ - mx_ksegment_t *mxc_seg_list; /* MX ksegment array for DATA */ - int mxc_nseg; /* number of segments */ - unsigned long mxc_pin_type; /* MX_PIN_KERNEL or MX_PIN_PHYSICAL */ - u32 mxc_nob; /* number of bytes sent/received */ - mx_request_t mxc_mxreq; /* MX request */ - mx_status_t mxc_status; /* MX status */ - s64 mxc_get; /* # of times returned from idle list */ - s64 mxc_put; /* # of times returned from idle list */ -}; - -#define MXLND_CONN_DISCONNECT -2 /* conn is being destroyed - do not add txs */ -#define MXLND_CONN_FAIL -1 /* connect failed (bad handshake, unavail, etc.) */ -#define MXLND_CONN_INIT 0 /* in the beginning, there was nothing... */ -#define MXLND_CONN_REQ 1 /* a connection request message is needed */ -#define MXLND_CONN_ACK 2 /* a connection ack is needed */ -#define MXLND_CONN_WAIT 3 /* waiting for req or ack to complete */ -#define MXLND_CONN_READY 4 /* ready to send */ - -/* connection state - queues for queued and pending msgs */ -struct kmx_conn -{ - u64 mxk_incarnation; /* connections's incarnation value */ - atomic_t mxk_refcount; /* reference counting */ - - struct kmx_peer *mxk_peer; /* owning peer */ - mx_endpoint_addr_t mxk_epa; /* peer's endpoint address */ - - struct list_head mxk_list; /* for placing on mxp_conns */ - spinlock_t mxk_lock; /* lock */ - unsigned long mxk_timeout; /* expiration of oldest pending tx/rx */ - unsigned long mxk_last_tx; /* when last tx completed with success */ - unsigned long mxk_last_rx; /* when last rx completed */ - - int mxk_credits; /* # of my credits for sending to peer */ - int mxk_outstanding; /* # of credits to return */ - - int mxk_status; /* can we send messages? MXLND_CONN_* */ - struct list_head mxk_tx_credit_queue; /* send queue for peer */ - struct list_head mxk_tx_free_queue; /* send queue for peer */ - int mxk_ntx_msgs; /* # of msgs on tx queues */ - int mxk_ntx_data ; /* # of DATA on tx queues */ - int mxk_ntx_posted; /* # of tx msgs in flight */ - int mxk_data_posted; /* # of tx data payloads in flight */ - - struct list_head mxk_pending; /* in flight rxs and txs */ -}; - -/* peer state */ -struct kmx_peer -{ - lnet_nid_t mxp_nid; /* peer's LNET NID */ - u64 mxp_incarnation; /* peer's incarnation value */ - atomic_t mxp_refcount; /* reference counts */ - - struct kmx_host *mxp_host; /* peer lookup info */ - u64 mxp_nic_id; /* remote's MX nic_id for mx_connect() */ - - struct list_head mxp_peers; /* for placing on kmx_peers */ - spinlock_t mxp_lock; /* lock */ - - struct list_head mxp_conns; /* list of connections */ - struct kmx_conn *mxp_conn; /* current connection */ - - unsigned long mxp_reconnect_time; /* when to retry connect */ - int mxp_incompatible; /* incorrect conn_req values */ -}; - -extern kmx_data_t kmxlnd_data; -extern kmx_tunables_t kmxlnd_tunables; - -/* required for the LNET API */ -int mxlnd_startup(lnet_ni_t *ni); -void mxlnd_shutdown(lnet_ni_t *ni); -int mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int mxlnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - -/* in mxlnd.c */ -extern void mxlnd_thread_stop(long id); -extern int mxlnd_ctx_alloc(struct kmx_ctx **ctxp, enum kmx_req_type type); -extern void mxlnd_ctx_free(struct kmx_ctx *ctx); -extern void mxlnd_ctx_init(struct kmx_ctx *ctx); -extern lnet_nid_t mxlnd_nic_id2nid(lnet_ni_t *ni, u64 nic_id); -extern u64 mxlnd_nid2nic_id(lnet_nid_t nid); - -/* in mxlnd_cb.c */ -void mxlnd_eager_recv(void *context, uint64_t match_value, uint32_t length); -extern mx_unexp_handler_action_t mxlnd_unexpected_recv(void *context, - mx_endpoint_addr_t source, uint64_t match_value, uint32_t length, - void *data_if_available); -extern void mxlnd_peer_free(struct kmx_peer *peer); -extern void mxlnd_conn_free(struct kmx_conn *conn); -extern void mxlnd_sleep(unsigned long timeout); -extern int mxlnd_tx_queued(void *arg); -extern void mxlnd_handle_rx_completion(struct kmx_ctx *rx); -extern int mxlnd_check_sends(struct kmx_peer *peer); -extern int mxlnd_tx_peer_queued(void *arg); -extern int mxlnd_request_waitd(void *arg); -extern int mxlnd_unex_recvd(void *arg); -extern int mxlnd_timeoutd(void *arg); -extern int mxlnd_connd(void *arg); - -#define mxlnd_peer_addref(peer) \ -do { \ - LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \ - atomic_inc(&(peer)->mxp_refcount); \ -} while (0) - - -#define mxlnd_peer_decref(peer) \ -do { \ - LASSERT(atomic_read(&(peer)->mxp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->mxp_refcount)) \ - mxlnd_peer_free(peer); \ -} while (0) - -#define mxlnd_conn_addref(conn) \ -do { \ - LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \ - atomic_inc(&(conn)->mxk_refcount); \ -} while (0) - - -#define mxlnd_conn_decref(conn) \ -do { \ - LASSERT(atomic_read(&(conn)->mxk_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->mxk_refcount)) \ - mxlnd_conn_free(conn); \ -} while (0) diff --git a/lnet/klnds/mxlnd/mxlnd_cb.c b/lnet/klnds/mxlnd/mxlnd_cb.c deleted file mode 100644 index 31e0a486d3dd397f50453af27836f52a0306b088..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_cb.c +++ /dev/null @@ -1,3440 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Myricom, Inc. <help at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -inline void mxlnd_noop(char *s, ...) -{ - return; -} - -char * -mxlnd_ctxstate_to_str(int mxc_state) -{ - switch (mxc_state) { - case MXLND_CTX_INIT: - return "MXLND_CTX_INIT"; - case MXLND_CTX_IDLE: - return "MXLND_CTX_IDLE"; - case MXLND_CTX_PREP: - return "MXLND_CTX_PREP"; - case MXLND_CTX_PENDING: - return "MXLND_CTX_PENDING"; - case MXLND_CTX_COMPLETED: - return "MXLND_CTX_COMPLETED"; - case MXLND_CTX_CANCELED: - return "MXLND_CTX_CANCELED"; - default: - return "*unknown*"; - } -} - -char * -mxlnd_connstatus_to_str(int mxk_status) -{ - switch (mxk_status) { - case MXLND_CONN_READY: - return "MXLND_CONN_READY"; - case MXLND_CONN_INIT: - return "MXLND_CONN_INIT"; - case MXLND_CONN_REQ: - return "MXLND_CONN_REQ"; - case MXLND_CONN_ACK: - return "MXLND_CONN_ACK"; - case MXLND_CONN_WAIT: - return "MXLND_CONN_WAIT"; - case MXLND_CONN_DISCONNECT: - return "MXLND_CONN_DISCONNECT"; - case MXLND_CONN_FAIL: - return "MXLND_CONN_FAIL"; - default: - return "unknown"; - } -} - -char * -mxlnd_msgtype_to_str(int type) { - switch (type) { - case MXLND_MSG_EAGER: - return "MXLND_MSG_EAGER"; - case MXLND_MSG_CONN_REQ: - return "MXLND_MSG_CONN_REQ"; - case MXLND_MSG_CONN_ACK: - return "MXLND_MSG_CONN_ACK"; - case MXLND_MSG_NOOP: - return "MXLND_MSG_NOOP"; - case MXLND_MSG_PUT_REQ: - return "MXLND_MSG_PUT_REQ"; - case MXLND_MSG_PUT_ACK: - return "MXLND_MSG_PUT_ACK"; - case MXLND_MSG_PUT_DATA: - return "MXLND_MSG_PUT_DATA"; - case MXLND_MSG_GET_REQ: - return "MXLND_MSG_GET_REQ"; - case MXLND_MSG_GET_DATA: - return "MXLND_MSG_GET_DATA"; - default: - return "unknown"; - } -} - -char * -mxlnd_lnetmsg_to_str(int type) -{ - switch (type) { - case LNET_MSG_ACK: - return "LNET_MSG_ACK"; - case LNET_MSG_PUT: - return "LNET_MSG_PUT"; - case LNET_MSG_GET: - return "LNET_MSG_GET"; - case LNET_MSG_REPLY: - return "LNET_MSG_REPLY"; - case LNET_MSG_HELLO: - return "LNET_MSG_HELLO"; - default: - return "*unknown*"; - } -} - -static inline u64 -//mxlnd_create_match(u8 msg_type, u8 error, u64 cookie) -mxlnd_create_match(struct kmx_ctx *ctx, u8 error) -{ - u64 type = (u64) ctx->mxc_msg_type; - u64 err = (u64) error; - u64 match = 0LL; - - LASSERT(ctx->mxc_msg_type != 0); - LASSERT(ctx->mxc_cookie >> 52 == 0); - match = (type << 60) | (err << 52) | ctx->mxc_cookie; - return match; -} - -static inline void -mxlnd_parse_match(u64 match, u8 *msg_type, u8 *error, u64 *cookie) -{ - *msg_type = (u8) (match >> 60); - *error = (u8) ((match >> 52) & 0xFF); - *cookie = match & 0xFFFFFFFFFFFFFLL; - LASSERT(match == (MXLND_MASK_ICON_REQ & 0xF000000000000000LL) || - match == (MXLND_MASK_ICON_ACK & 0xF000000000000000LL) || - *msg_type == MXLND_MSG_EAGER || - *msg_type == MXLND_MSG_CONN_REQ || - *msg_type == MXLND_MSG_CONN_ACK || - *msg_type == MXLND_MSG_NOOP || - *msg_type == MXLND_MSG_PUT_REQ || - *msg_type == MXLND_MSG_PUT_ACK || - *msg_type == MXLND_MSG_PUT_DATA || - *msg_type == MXLND_MSG_GET_REQ || - *msg_type == MXLND_MSG_GET_DATA); - return; -} - -struct kmx_ctx * -mxlnd_get_idle_rx(void) -{ - struct list_head *tmp = NULL; - struct kmx_ctx *rx = NULL; - - spin_lock(&kmxlnd_data.kmx_rx_idle_lock); - - if (list_empty (&kmxlnd_data.kmx_rx_idle)) { - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - return NULL; - } - - tmp = &kmxlnd_data.kmx_rx_idle; - rx = list_entry (tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&rx->mxc_list); - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - -#if MXLND_DEBUG - if (rx->mxc_get != rx->mxc_put) { - CDEBUG(D_NETERROR, "*** RX get (%lld) != put (%lld) ***\n", rx->mxc_get, rx->mxc_put); - CDEBUG(D_NETERROR, "*** incarnation= %lld ***\n", rx->mxc_incarnation); - CDEBUG(D_NETERROR, "*** deadline= %ld ***\n", rx->mxc_deadline); - CDEBUG(D_NETERROR, "*** state= %s ***\n", mxlnd_ctxstate_to_str(rx->mxc_state)); - CDEBUG(D_NETERROR, "*** listed?= %d ***\n", !list_empty(&rx->mxc_list)); - CDEBUG(D_NETERROR, "*** nid= 0x%llx ***\n", rx->mxc_nid); - CDEBUG(D_NETERROR, "*** peer= 0x%p ***\n", rx->mxc_peer); - CDEBUG(D_NETERROR, "*** msg_type= %s ***\n", mxlnd_msgtype_to_str(rx->mxc_msg_type)); - CDEBUG(D_NETERROR, "*** cookie= 0x%llx ***\n", rx->mxc_cookie); - CDEBUG(D_NETERROR, "*** nob= %d ***\n", rx->mxc_nob); - } -#endif - LASSERT (rx->mxc_get == rx->mxc_put); - - rx->mxc_get++; - - LASSERT (rx->mxc_state == MXLND_CTX_IDLE); - rx->mxc_state = MXLND_CTX_PREP; - - return rx; -} - -int -mxlnd_put_idle_rx(struct kmx_ctx *rx) -{ - if (rx == NULL) { - CDEBUG(D_NETERROR, "called with NULL pointer\n"); - return -EINVAL; - } else if (rx->mxc_type != MXLND_REQ_RX) { - CDEBUG(D_NETERROR, "called with tx\n"); - return -EINVAL; - } - LASSERT(rx->mxc_get == rx->mxc_put + 1); - mxlnd_ctx_init(rx); - rx->mxc_put++; - spin_lock(&kmxlnd_data.kmx_rx_idle_lock); - list_add_tail(&rx->mxc_list, &kmxlnd_data.kmx_rx_idle); - spin_unlock(&kmxlnd_data.kmx_rx_idle_lock); - return 0; -} - -int -mxlnd_reduce_idle_rxs(__u32 count) -{ - __u32 i = 0; - struct kmx_ctx *rx = NULL; - - spin_lock(&kmxlnd_data.kmx_rxs_lock); - for (i = 0; i < count; i++) { - rx = mxlnd_get_idle_rx(); - if (rx != NULL) { - struct list_head *tmp = &rx->mxc_global_list; - list_del_init(tmp); - mxlnd_ctx_free(rx); - } else { - CDEBUG(D_NETERROR, "only reduced %d out of %d rxs\n", i, count); - break; - } - } - spin_unlock(&kmxlnd_data.kmx_rxs_lock); - return 0; -} - -struct kmx_ctx * -mxlnd_get_idle_tx(void) -{ - struct list_head *tmp = NULL; - struct kmx_ctx *tx = NULL; - - spin_lock(&kmxlnd_data.kmx_tx_idle_lock); - - if (list_empty (&kmxlnd_data.kmx_tx_idle)) { - CDEBUG(D_NETERROR, "%d txs in use\n", kmxlnd_data.kmx_tx_used); - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - return NULL; - } - - tmp = &kmxlnd_data.kmx_tx_idle; - tx = list_entry (tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->mxc_cookie = kmxlnd_data.kmx_tx_next_cookie++; - if (kmxlnd_data.kmx_tx_next_cookie > MXLND_MAX_COOKIE) { - tx->mxc_cookie = 1; - } - kmxlnd_data.kmx_tx_used++; - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - - LASSERT (tx->mxc_get == tx->mxc_put); - - tx->mxc_get++; - - LASSERT (tx->mxc_state == MXLND_CTX_IDLE); - LASSERT (tx->mxc_lntmsg[0] == NULL); - LASSERT (tx->mxc_lntmsg[1] == NULL); - - tx->mxc_state = MXLND_CTX_PREP; - - return tx; -} - -int -mxlnd_put_idle_tx(struct kmx_ctx *tx) -{ - int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS && tx->mxc_status.code != MX_STATUS_TRUNCATED); - int result = failed ? -EIO : 0; - lnet_msg_t *lntmsg[2]; - - if (tx == NULL) { - CDEBUG(D_NETERROR, "called with NULL pointer\n"); - return -EINVAL; - } else if (tx->mxc_type != MXLND_REQ_TX) { - CDEBUG(D_NETERROR, "called with rx\n"); - return -EINVAL; - } - - lntmsg[0] = tx->mxc_lntmsg[0]; - lntmsg[1] = tx->mxc_lntmsg[1]; - - LASSERT(tx->mxc_get == tx->mxc_put + 1); - mxlnd_ctx_init(tx); - tx->mxc_put++; - spin_lock(&kmxlnd_data.kmx_tx_idle_lock); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_idle); - kmxlnd_data.kmx_tx_used--; - spin_unlock(&kmxlnd_data.kmx_tx_idle_lock); - if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result); - if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result); - return 0; -} - -/** - * mxlnd_conn_free - free the conn - * @conn - a kmx_conn pointer - * - * The calling function should remove the conn from the conns list first - * then destroy it. - */ -void -mxlnd_conn_free(struct kmx_conn *conn) -{ - struct kmx_peer *peer = conn->mxk_peer; - - CDEBUG(D_NET, "freeing conn 0x%p *****\n", conn); - LASSERT (list_empty (&conn->mxk_tx_credit_queue) && - list_empty (&conn->mxk_tx_free_queue) && - list_empty (&conn->mxk_pending)); - if (!list_empty(&conn->mxk_list)) { - spin_lock(&peer->mxp_lock); - list_del_init(&conn->mxk_list); - if (peer->mxp_conn == conn) { - peer->mxp_conn = NULL; - if (!(conn->mxk_epa.stuff[0] == 0 && conn->mxk_epa.stuff[1] == 0)) { - mx_set_endpoint_addr_context(conn->mxk_epa, - (void *) NULL); - } - } - spin_unlock(&peer->mxp_lock); - } - mxlnd_peer_decref(conn->mxk_peer); /* drop conn's ref to peer */ - MXLND_FREE (conn, sizeof (*conn)); - return; -} - - -void -mxlnd_conn_cancel_pending_rxs(struct kmx_conn *conn) -{ - int found = 0; - struct kmx_ctx *ctx = NULL; - struct kmx_ctx *next = NULL; - mx_return_t mxret = MX_SUCCESS; - u32 result = 0; - - do { - found = 0; - spin_lock(&conn->mxk_lock); - list_for_each_entry_safe(ctx, next, &conn->mxk_pending, mxc_list) { - /* we will delete all including txs */ - list_del_init(&ctx->mxc_list); - if (ctx->mxc_type == MXLND_REQ_RX) { - found = 1; - mxret = mx_cancel(kmxlnd_data.kmx_endpt, - &ctx->mxc_mxreq, - &result); - if (mxret != MX_SUCCESS) { - CDEBUG(D_NETERROR, "mx_cancel() returned %s (%d)\n", mx_strerror(mxret), mxret); - } - if (result == 1) { - ctx->mxc_status.code = -ECONNABORTED; - ctx->mxc_state = MXLND_CTX_CANCELED; - /* NOTE this calls lnet_finalize() and - * we cannot hold any locks when calling it. - * It also calls mxlnd_conn_decref(conn) */ - spin_unlock(&conn->mxk_lock); - mxlnd_handle_rx_completion(ctx); - spin_lock(&conn->mxk_lock); - } - break; - } - } - spin_unlock(&conn->mxk_lock); - } - while (found); - - return; -} - -/** - * mxlnd_conn_disconnect - shutdown a connection - * @conn - a kmx_conn pointer - * - * This function sets the status to DISCONNECT, completes queued - * txs with failure, calls mx_disconnect, which will complete - * pending txs and matched rxs with failure. - */ -void -mxlnd_conn_disconnect(struct kmx_conn *conn, int mx_dis, int notify) -{ - struct list_head *tmp = NULL; - - spin_lock(&conn->mxk_lock); - if (conn->mxk_status == MXLND_CONN_DISCONNECT) { - spin_unlock(&conn->mxk_lock); - return; - } - conn->mxk_status = MXLND_CONN_DISCONNECT; - conn->mxk_timeout = 0; - - while (!list_empty(&conn->mxk_tx_free_queue) || - !list_empty(&conn->mxk_tx_credit_queue)) { - - struct kmx_ctx *tx = NULL; - - if (!list_empty(&conn->mxk_tx_free_queue)) { - tmp = &conn->mxk_tx_free_queue; - } else { - tmp = &conn->mxk_tx_credit_queue; - } - - tx = list_entry(tmp->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - tx->mxc_status.code = -ECONNABORTED; - spin_unlock(&conn->mxk_lock); - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); /* for this tx */ - spin_lock(&conn->mxk_lock); - } - - spin_unlock(&conn->mxk_lock); - - /* cancel pending rxs */ - mxlnd_conn_cancel_pending_rxs(conn); - - if (kmxlnd_data.kmx_shutdown != 1) { - - if (mx_dis) mx_disconnect(kmxlnd_data.kmx_endpt, conn->mxk_epa); - - if (notify) { - time_t last_alive = 0; - unsigned long last_msg = 0; - - /* notify LNET that we are giving up on this peer */ - if (time_after(conn->mxk_last_rx, conn->mxk_last_tx)) { - last_msg = conn->mxk_last_rx; - } else { - last_msg = conn->mxk_last_tx; - } - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - last_msg); - lnet_notify(kmxlnd_data.kmx_ni, conn->mxk_peer->mxp_nid, 0, last_alive); - } - } - mxlnd_conn_decref(conn); /* drop the owning peer's reference */ - - return; -} - -/** - * mxlnd_conn_alloc - allocate and initialize a new conn struct - * @connp - address of a kmx_conn pointer - * @peer - owning kmx_peer - * - * Returns 0 on success and -ENOMEM on failure - */ -int -mxlnd_conn_alloc(struct kmx_conn **connp, struct kmx_peer *peer) -{ - struct kmx_conn *conn = NULL; - - LASSERT(peer != NULL); - - MXLND_ALLOC(conn, sizeof (*conn)); - if (conn == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate conn\n"); - return -ENOMEM; - } - CDEBUG(D_NET, "allocated conn 0x%p for peer 0x%p\n", conn, peer); - - memset(conn, 0, sizeof(*conn)); - - /* conn->mxk_incarnation = 0 - will be set by peer */ - atomic_set(&conn->mxk_refcount, 1); /* ref for owning peer */ - conn->mxk_peer = peer; - /* mxk_epa - to be set after mx_iconnect() */ - INIT_LIST_HEAD(&conn->mxk_list); - spin_lock_init(&conn->mxk_lock); - /* conn->mxk_timeout = 0 */ - conn->mxk_last_tx = jiffies; - conn->mxk_last_rx = conn->mxk_last_tx; - conn->mxk_credits = *kmxlnd_tunables.kmx_credits; - /* mxk_outstanding = 0 */ - conn->mxk_status = MXLND_CONN_INIT; - INIT_LIST_HEAD(&conn->mxk_tx_credit_queue); - INIT_LIST_HEAD(&conn->mxk_tx_free_queue); - /* conn->mxk_ntx_msgs = 0 */ - /* conn->mxk_ntx_data = 0 */ - /* conn->mxk_ntx_posted = 0 */ - /* conn->mxk_data_posted = 0 */ - INIT_LIST_HEAD(&conn->mxk_pending); - - *connp = conn; - - mxlnd_peer_addref(peer); /* add a ref for this conn */ - - /* add to front of peer's conns list */ - spin_lock(&peer->mxp_lock); - list_add(&conn->mxk_list, &peer->mxp_conns); - peer->mxp_conn = conn; - spin_unlock(&peer->mxp_lock); - return 0; -} - - -int -mxlnd_q_pending_ctx(struct kmx_ctx *ctx) -{ - int ret = 0; - struct kmx_conn *conn = ctx->mxc_conn; - - ctx->mxc_state = MXLND_CTX_PENDING; - if (conn != NULL) { - spin_lock(&conn->mxk_lock); - if (conn->mxk_status >= MXLND_CONN_INIT) { - list_add_tail(&ctx->mxc_list, &conn->mxk_pending); - if (conn->mxk_timeout == 0 || ctx->mxc_deadline < conn->mxk_timeout) { - conn->mxk_timeout = ctx->mxc_deadline; - } - } else { - ctx->mxc_state = MXLND_CTX_COMPLETED; - ret = -1; - } - spin_unlock(&conn->mxk_lock); - } - return ret; -} - -int -mxlnd_deq_pending_ctx(struct kmx_ctx *ctx) -{ - LASSERT(ctx->mxc_state == MXLND_CTX_PENDING || - ctx->mxc_state == MXLND_CTX_COMPLETED); - if (ctx->mxc_state != MXLND_CTX_PENDING && - ctx->mxc_state != MXLND_CTX_COMPLETED) { - CDEBUG(D_NETERROR, "deq ctx->mxc_state = %s\n", - mxlnd_ctxstate_to_str(ctx->mxc_state)); - } - ctx->mxc_state = MXLND_CTX_COMPLETED; - if (!list_empty(&ctx->mxc_list)) { - struct kmx_conn *conn = ctx->mxc_conn; - struct kmx_ctx *next = NULL; - LASSERT(conn != NULL); - spin_lock(&conn->mxk_lock); - list_del_init(&ctx->mxc_list); - conn->mxk_timeout = 0; - if (!list_empty(&conn->mxk_pending)) { - next = list_entry(conn->mxk_pending.next, struct kmx_ctx, mxc_list); - conn->mxk_timeout = next->mxc_deadline; - } - spin_unlock(&ctx->mxc_conn->mxk_lock); - } - return 0; -} - -/** - * mxlnd_peer_free - free the peer - * @peer - a kmx_peer pointer - * - * The calling function should decrement the rxs, drain the tx queues and - * remove the peer from the peers list first then destroy it. - */ -void -mxlnd_peer_free(struct kmx_peer *peer) -{ - CDEBUG(D_NET, "freeing peer 0x%p\n", peer); - - LASSERT (atomic_read(&peer->mxp_refcount) == 0); - - if (peer->mxp_host != NULL) { - spin_lock(&peer->mxp_host->mxh_lock); - peer->mxp_host->mxh_peer = NULL; - spin_unlock(&peer->mxp_host->mxh_lock); - } - if (!list_empty(&peer->mxp_peers)) { - /* assume we are locked */ - list_del_init(&peer->mxp_peers); - } - - MXLND_FREE (peer, sizeof (*peer)); - atomic_dec(&kmxlnd_data.kmx_npeers); - return; -} - -void -mxlnd_peer_hostname_to_nic_id(struct kmx_peer *peer) -{ - u64 nic_id = 0LL; - char name[MX_MAX_HOSTNAME_LEN + 1]; - mx_return_t mxret = MX_SUCCESS; - - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name), "%s:%d", peer->mxp_host->mxh_hostname, peer->mxp_host->mxh_board); - mxret = mx_hostname_to_nic_id(name, &nic_id); - if (mxret == MX_SUCCESS) { - peer->mxp_nic_id = nic_id; - } else { - CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s " - "with %s\n", mx_strerror(mxret), name); - mxret = mx_hostname_to_nic_id(peer->mxp_host->mxh_hostname, &nic_id); - if (mxret == MX_SUCCESS) { - peer->mxp_nic_id = nic_id; - } else { - CDEBUG(D_NETERROR, "mx_hostname_to_nic_id() failed for %s " - "with %s\n", mx_strerror(mxret), - peer->mxp_host->mxh_hostname); - } - } - return; -} - -/** - * mxlnd_peer_alloc - allocate and initialize a new peer struct - * @peerp - address of a kmx_peer pointer - * @nid - LNET node id - * - * Returns 0 on success and -ENOMEM on failure - */ -int -mxlnd_peer_alloc(struct kmx_peer **peerp, lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - u32 addr = LNET_NIDADDR(nid); - struct kmx_peer *peer = NULL; - struct kmx_host *host = NULL; - - LASSERT (nid != LNET_NID_ANY && nid != 0LL); - - MXLND_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CDEBUG(D_NETERROR, "Cannot allocate peer for NID 0x%llx\n", nid); - return -ENOMEM; - } - CDEBUG(D_NET, "allocated peer 0x%p for NID 0x%llx\n", peer, nid); - - memset(peer, 0, sizeof(*peer)); - - list_for_each_entry(host, &kmxlnd_data.kmx_hosts, mxh_list) { - if (addr == host->mxh_addr) { - peer->mxp_host = host; - spin_lock(&host->mxh_lock); - host->mxh_peer = peer; - spin_unlock(&host->mxh_lock); - break; - } - } - LASSERT(peer->mxp_host != NULL); - - peer->mxp_nid = nid; - /* peer->mxp_incarnation */ - atomic_set(&peer->mxp_refcount, 1); /* ref for kmx_peers list */ - mxlnd_peer_hostname_to_nic_id(peer); - - INIT_LIST_HEAD(&peer->mxp_peers); - spin_lock_init(&peer->mxp_lock); - INIT_LIST_HEAD(&peer->mxp_conns); - ret = mxlnd_conn_alloc(&peer->mxp_conn, peer); - if (ret != 0) { - mxlnd_peer_decref(peer); - return ret; - } - - for (i = 0; i < *kmxlnd_tunables.kmx_credits - 1; i++) { - struct kmx_ctx *rx = NULL; - ret = mxlnd_ctx_alloc(&rx, MXLND_REQ_RX); - if (ret != 0) { - mxlnd_reduce_idle_rxs(i); - mxlnd_peer_decref(peer); - return ret; - } - spin_lock(&kmxlnd_data.kmx_rxs_lock); - list_add_tail(&rx->mxc_global_list, &kmxlnd_data.kmx_rxs); - spin_unlock(&kmxlnd_data.kmx_rxs_lock); - rx->mxc_put = -1; - mxlnd_put_idle_rx(rx); - } - /* peer->mxp_reconnect_time = 0 */ - /* peer->mxp_incompatible = 0 */ - - *peerp = peer; - return 0; -} - -/** - * mxlnd_nid_to_hash - hash the nid - * @nid - msg pointer - * - * Takes the u64 nid and XORs the lowest N bits by the next lowest N bits. - */ -static inline int -mxlnd_nid_to_hash(lnet_nid_t nid) -{ - return (nid & MXLND_HASH_MASK) ^ - ((nid & (MXLND_HASH_MASK << MXLND_HASH_BITS)) >> MXLND_HASH_BITS); -} - -static inline struct kmx_peer * -mxlnd_find_peer_by_nid(lnet_nid_t nid) -{ - int found = 0; - int hash = 0; - struct kmx_peer *peer = NULL; - - hash = mxlnd_nid_to_hash(nid); - - read_lock(&kmxlnd_data.kmx_peers_lock); - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[hash], mxp_peers) { - if (peer->mxp_nid == nid) { - found = 1; - break; - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - return (found ? peer : NULL); -} - -static inline int -mxlnd_tx_requires_credit(struct kmx_ctx *tx) -{ - return (tx->mxc_msg_type == MXLND_MSG_EAGER || - tx->mxc_msg_type == MXLND_MSG_GET_REQ || - tx->mxc_msg_type == MXLND_MSG_PUT_REQ || - tx->mxc_msg_type == MXLND_MSG_NOOP); -} - -/** - * mxlnd_init_msg - set type and number of bytes - * @msg - msg pointer - * @type - of message - * @body_nob - bytes in msg body - */ -static inline void -mxlnd_init_msg(kmx_msg_t *msg, u8 type, int body_nob) -{ - msg->mxm_type = type; - msg->mxm_nob = offsetof(kmx_msg_t, mxm_u) + body_nob; -} - -static inline void -mxlnd_init_tx_msg (struct kmx_ctx *tx, u8 type, int body_nob, lnet_nid_t nid) -{ - int nob = offsetof (kmx_msg_t, mxm_u) + body_nob; - struct kmx_msg *msg = NULL; - - LASSERT (tx != NULL); - LASSERT (nob <= MXLND_EAGER_SIZE); - - tx->mxc_nid = nid; - /* tx->mxc_peer should have already been set if we know it */ - tx->mxc_msg_type = type; - tx->mxc_nseg = 1; - /* tx->mxc_seg.segment_ptr is already pointing to mxc_page */ - tx->mxc_seg.segment_length = nob; - tx->mxc_pin_type = MX_PIN_PHYSICAL; - //tx->mxc_state = MXLND_CTX_PENDING; - - msg = tx->mxc_msg; - msg->mxm_type = type; - msg->mxm_nob = nob; - - return; -} - -static inline __u32 -mxlnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -/** - * mxlnd_pack_msg - complete msg info - * @tx - msg to send - */ -static inline void -mxlnd_pack_msg(struct kmx_ctx *tx) -{ - struct kmx_msg *msg = tx->mxc_msg; - - /* type and nob should already be set in init_msg() */ - msg->mxm_magic = MXLND_MSG_MAGIC; - msg->mxm_version = MXLND_MSG_VERSION; - /* mxm_type */ - /* don't use mxlnd_tx_requires_credit() since we want PUT_ACK to - * return credits as well */ - if (tx->mxc_msg_type != MXLND_MSG_CONN_REQ && - tx->mxc_msg_type != MXLND_MSG_CONN_ACK) { - spin_lock(&tx->mxc_conn->mxk_lock); - msg->mxm_credits = tx->mxc_conn->mxk_outstanding; - tx->mxc_conn->mxk_outstanding = 0; - spin_unlock(&tx->mxc_conn->mxk_lock); - } else { - msg->mxm_credits = 0; - } - /* mxm_nob */ - msg->mxm_cksum = 0; - msg->mxm_srcnid = lnet_ptlcompat_srcnid(kmxlnd_data.kmx_ni->ni_nid, tx->mxc_nid); - msg->mxm_srcstamp = kmxlnd_data.kmx_incarnation; - msg->mxm_dstnid = tx->mxc_nid; - /* if it is a new peer, the dststamp will be 0 */ - msg->mxm_dststamp = tx->mxc_conn->mxk_incarnation; - msg->mxm_seq = tx->mxc_cookie; - - if (*kmxlnd_tunables.kmx_cksum) { - msg->mxm_cksum = mxlnd_cksum(msg, msg->mxm_nob); - } -} - -int -mxlnd_unpack_msg(kmx_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kmx_msg_t, mxm_u); - __u32 msg_cksum = 0; - int flip = 0; - int msg_nob = 0; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CDEBUG(D_NETERROR, "not enough bytes for magic + hdr: %d\n", nob); - return -EPROTO; - } - - if (msg->mxm_magic == MXLND_MSG_MAGIC) { - flip = 0; - } else if (msg->mxm_magic == __swab32(MXLND_MSG_MAGIC)) { - flip = 1; - } else { - CDEBUG(D_NETERROR, "Bad magic: %08x\n", msg->mxm_magic); - return -EPROTO; - } - - if (msg->mxm_version != - (flip ? __swab16(MXLND_MSG_VERSION) : MXLND_MSG_VERSION)) { - CDEBUG(D_NETERROR, "Bad version: %d\n", msg->mxm_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CDEBUG(D_NETERROR, "not enough for a header: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->mxm_nob) : msg->mxm_nob; - if (msg_nob > nob) { - CDEBUG(D_NETERROR, "Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with mxm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->mxm_cksum) : msg->mxm_cksum; - msg->mxm_cksum = 0; - if (msg_cksum != 0 && msg_cksum != mxlnd_cksum(msg, msg_nob)) { - CDEBUG(D_NETERROR, "Bad checksum\n"); - return -EPROTO; - } - msg->mxm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->mxm_version); - CLASSERT (sizeof(msg->mxm_type) == 1); - CLASSERT (sizeof(msg->mxm_credits) == 1); - msg->mxm_nob = msg_nob; - __swab64s(&msg->mxm_srcnid); - __swab64s(&msg->mxm_srcstamp); - __swab64s(&msg->mxm_dstnid); - __swab64s(&msg->mxm_dststamp); - __swab64s(&msg->mxm_seq); - } - - if (msg->mxm_srcnid == LNET_NID_ANY) { - CDEBUG(D_NETERROR, "Bad src nid: %s\n", libcfs_nid2str(msg->mxm_srcnid)); - return -EPROTO; - } - - switch (msg->mxm_type) { - default: - CDEBUG(D_NETERROR, "Unknown message type %x\n", msg->mxm_type); - return -EPROTO; - - case MXLND_MSG_NOOP: - break; - - case MXLND_MSG_EAGER: - if (msg_nob < offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])) { - CDEBUG(D_NETERROR, "Short EAGER: %d(%d)\n", msg_nob, - (int)offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[0])); - return -EPROTO; - } - break; - - case MXLND_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_req)) { - CDEBUG(D_NETERROR, "Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.put_req))); - return -EPROTO; - } - if (flip) - __swab64s(&msg->mxm_u.put_req.mxprm_cookie); - break; - - case MXLND_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.put_ack)) { - CDEBUG(D_NETERROR, "Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.put_ack))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->mxm_u.put_ack.mxpam_src_cookie); - __swab64s(&msg->mxm_u.put_ack.mxpam_dst_cookie); - } - break; - - case MXLND_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.get_req)) { - CDEBUG(D_NETERROR, "Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.get_req))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->mxm_u.get_req.mxgrm_cookie); - } - break; - - case MXLND_MSG_CONN_REQ: - case MXLND_MSG_CONN_ACK: - if (msg_nob < hdr_size + sizeof(msg->mxm_u.conn_req)) { - CDEBUG(D_NETERROR, "Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->mxm_u.conn_req))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->mxm_u.conn_req.mxcrm_queue_depth); - __swab32s(&msg->mxm_u.conn_req.mxcrm_eager_size); - } - break; - } - return 0; -} - -/** - * mxlnd_recv_msg - * @lntmsg - the LNET msg that this is continuing. If EAGER, then NULL. - * @rx - * @msg_type - * @cookie - * @length - length of incoming message - * @pending - add to kmx_pending (0 is NO and 1 is YES) - * - * The caller gets the rx and sets nid, peer and conn if known. - * - * Returns 0 on success and -1 on failure - */ -int -mxlnd_recv_msg(lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie, u32 length) -{ - int ret = 0; - mx_return_t mxret = MX_SUCCESS; - uint64_t mask = 0xF00FFFFFFFFFFFFFLL; - - rx->mxc_msg_type = msg_type; - rx->mxc_lntmsg[0] = lntmsg; /* may be NULL if EAGER */ - rx->mxc_cookie = cookie; - /* rx->mxc_match may already be set */ - /* rx->mxc_seg.segment_ptr is already set */ - rx->mxc_seg.segment_length = length; - rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - ret = mxlnd_q_pending_ctx(rx); - if (ret == -1) { - /* FIXME the conn is disconnected, now what? */ - return -1; - } - mxret = mx_kirecv(kmxlnd_data.kmx_endpt, &rx->mxc_seg, 1, MX_PIN_PHYSICAL, - cookie, mask, (void *) rx, &rx->mxc_mxreq); - if (mxret != MX_SUCCESS) { - mxlnd_deq_pending_ctx(rx); - CDEBUG(D_NETERROR, "mx_kirecv() failed with %s (%d)\n", - mx_strerror(mxret), (int) mxret); - return -1; - } - return 0; -} - - -/** - * mxlnd_unexpected_recv - this is the callback function that will handle - * unexpected receives - * @context - NULL, ignore - * @source - the peer's mx_endpoint_addr_t - * @match_value - the msg's bit, should be MXLND_MASK_EAGER - * @length - length of incoming message - * @data_if_available - ignore - * - * If it is an eager-sized msg, we will call recv_msg() with the actual - * length. If it is a large message, we will call recv_msg() with a - * length of 0 bytes to drop it because we should never have a large, - * unexpected message. - * - * NOTE - The MX library blocks until this function completes. Make it as fast as - * possible. DO NOT allocate memory which can block! - * - * If we cannot get a rx or the conn is closed, drop the message on the floor - * (i.e. recv 0 bytes and ignore). - */ -mx_unexp_handler_action_t -mxlnd_unexpected_recv(void *context, mx_endpoint_addr_t source, - uint64_t match_value, uint32_t length, void *data_if_available) -{ - int ret = 0; - struct kmx_ctx *rx = NULL; - mx_ksegment_t seg; - u8 msg_type = 0; - u8 error = 0; - u64 cookie = 0LL; - - if (context != NULL) { - CDEBUG(D_NETERROR, "unexpected receive with non-NULL context\n"); - } - -#if MXLND_DEBUG - CDEBUG(D_NET, "unexpected_recv() bits=0x%llx length=%d\n", match_value, length); -#endif - - rx = mxlnd_get_idle_rx(); - if (rx != NULL) { - mxlnd_parse_match(match_value, &msg_type, &error, &cookie); - if (length <= MXLND_EAGER_SIZE) { - ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, length); - } else { - CDEBUG(D_NETERROR, "unexpected large receive with " - "match_value=0x%llx length=%d\n", - match_value, length); - ret = mxlnd_recv_msg(NULL, rx, msg_type, match_value, 0); - } - if (ret == 0) { - struct kmx_conn *conn = NULL; - mx_get_endpoint_addr_context(source, (void **) &conn); - if (conn != NULL) { - mxlnd_conn_addref(conn); - rx->mxc_conn = conn; - rx->mxc_peer = conn->mxk_peer; - if (conn->mxk_peer != NULL) { - rx->mxc_nid = conn->mxk_peer->mxp_nid; - } else { - CDEBUG(D_NETERROR, "conn is 0x%p and peer " - "is NULL\n", conn); - } - } - } else { - CDEBUG(D_NETERROR, "could not post receive\n"); - mxlnd_put_idle_rx(rx); - } - } - - if (rx == NULL || ret != 0) { - if (rx == NULL) { - CDEBUG(D_NETERROR, "no idle rxs available - dropping rx\n"); - } else { - /* ret != 0 */ - CDEBUG(D_NETERROR, "disconnected peer - dropping rx\n"); - } - seg.segment_ptr = 0LL; - seg.segment_length = 0; - mx_kirecv(kmxlnd_data.kmx_endpt, &seg, 1, MX_PIN_PHYSICAL, - match_value, 0xFFFFFFFFFFFFFFFFLL, NULL, NULL); - } - - return MX_RECV_CONTINUE; -} - - -int -mxlnd_get_peer_info(int index, lnet_nid_t *nidp, int *count) -{ - int i = 0; - int ret = -ENOENT; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - conn = peer->mxp_conn; - if (index-- > 0) - continue; - - *nidp = peer->mxp_nid; - *count = atomic_read(&peer->mxp_refcount); - ret = 0; - break; - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -void -mxlnd_del_peer_locked(struct kmx_peer *peer) -{ - list_del_init(&peer->mxp_peers); /* remove from the global list */ - if (peer->mxp_conn) mxlnd_conn_disconnect(peer->mxp_conn, 0, 0); - mxlnd_peer_decref(peer); /* drop global list ref */ - return; -} - -int -mxlnd_del_peer(lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *next = NULL; - - if (nid != LNET_NID_ANY) { - peer = mxlnd_find_peer_by_nid(nid); - } - write_lock(&kmxlnd_data.kmx_peers_lock); - if (nid != LNET_NID_ANY) { - if (peer == NULL) { - ret = -ENOENT; - } else { - mxlnd_del_peer_locked(peer); - } - } else { /* LNET_NID_ANY */ - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry_safe(peer, next, - &kmxlnd_data.kmx_peers[i], mxp_peers) { - mxlnd_del_peer_locked(peer); - } - } - } - write_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -struct kmx_conn * -mxlnd_get_conn_by_idx(int index) -{ - int i = 0; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - list_for_each_entry(conn, &peer->mxp_conns, mxk_list) { - if (index-- > 0) - continue; - - mxlnd_conn_addref(conn); /* add ref here, dec in ctl() */ - read_unlock(&kmxlnd_data.kmx_peers_lock); - return conn; - } - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return NULL; -} - -void -mxlnd_close_matching_conns_locked(struct kmx_peer *peer) -{ - struct kmx_conn *conn = NULL; - struct kmx_conn *next = NULL; - - list_for_each_entry_safe(conn, next, &peer->mxp_conns, mxk_list) { - mxlnd_conn_disconnect(conn, 0 , 0); - } - return; -} - -int -mxlnd_close_matching_conns(lnet_nid_t nid) -{ - int i = 0; - int ret = 0; - struct kmx_peer *peer = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - if (nid != LNET_NID_ANY) { - peer = mxlnd_find_peer_by_nid(nid); - if (peer == NULL) { - ret = -ENOENT; - } else { - mxlnd_close_matching_conns_locked(peer); - } - } else { /* LNET_NID_ANY */ - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) - mxlnd_close_matching_conns_locked(peer); - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - return ret; -} - -/** - * mxlnd_ctl - modify MXLND parameters - * @ni - LNET interface handle - * @cmd - command to change - * @arg - the ioctl data - * - * Not implemented yet. - */ -int -mxlnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int ret = -EINVAL; - - LASSERT (ni == kmxlnd_data.kmx_ni); - - switch (cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - ret = mxlnd_get_peer_info(data->ioc_count, &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - case IOC_LIBCFS_DEL_PEER: { - ret = mxlnd_del_peer(data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - struct kmx_conn *conn = NULL; - - conn = mxlnd_get_conn_by_idx(data->ioc_count); - if (conn == NULL) { - ret = -ENOENT; - } else { - ret = 0; - data->ioc_nid = conn->mxk_peer->mxp_nid; - mxlnd_conn_decref(conn); /* dec ref taken in get_conn_by_idx() */ - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - ret = mxlnd_close_matching_conns(data->ioc_nid); - break; - } - default: - CDEBUG(D_NETERROR, "unknown ctl(%d)\n", cmd); - break; - } - - return ret; -} - -/** - * mxlnd_peer_queue_tx_locked - add the tx to the global tx queue - * @tx - * - * Add the tx to the peer's msg or data queue. The caller has locked the peer. - */ -void -mxlnd_peer_queue_tx_locked(struct kmx_ctx *tx) -{ - u8 msg_type = tx->mxc_msg_type; - //struct kmx_peer *peer = tx->mxc_peer; - struct kmx_conn *conn = tx->mxc_conn; - - LASSERT (msg_type != 0); - LASSERT (tx->mxc_nid != 0); - LASSERT (tx->mxc_peer != NULL); - LASSERT (tx->mxc_conn != NULL); - - tx->mxc_incarnation = conn->mxk_incarnation; - - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA) { - /* msg style tx */ - if (mxlnd_tx_requires_credit(tx)) { - list_add_tail(&tx->mxc_list, &conn->mxk_tx_credit_queue); - conn->mxk_ntx_msgs++; - } else if (msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK) { - /* put conn msgs at the front of the queue */ - list_add(&tx->mxc_list, &conn->mxk_tx_free_queue); - } else { - /* PUT_ACK, PUT_NAK */ - list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue); - conn->mxk_ntx_msgs++; - } - } else { - /* data style tx */ - list_add_tail(&tx->mxc_list, &conn->mxk_tx_free_queue); - conn->mxk_ntx_data++; - } - - return; -} - -/** - * mxlnd_peer_queue_tx - add the tx to the global tx queue - * @tx - * - * Add the tx to the peer's msg or data queue - */ -static inline void -mxlnd_peer_queue_tx(struct kmx_ctx *tx) -{ - LASSERT(tx->mxc_peer != NULL); - LASSERT(tx->mxc_conn != NULL); - spin_lock(&tx->mxc_conn->mxk_lock); - mxlnd_peer_queue_tx_locked(tx); - spin_unlock(&tx->mxc_conn->mxk_lock); - - return; -} - -/** - * mxlnd_queue_tx - add the tx to the global tx queue - * @tx - * - * Add the tx to the global queue and up the tx_queue_sem - */ -void -mxlnd_queue_tx(struct kmx_ctx *tx) -{ - int ret = 0; - struct kmx_peer *peer = tx->mxc_peer; - LASSERT (tx->mxc_nid != 0); - - if (peer != NULL) { - if (peer->mxp_incompatible && - tx->mxc_msg_type != MXLND_MSG_CONN_ACK) { - /* let this fail now */ - tx->mxc_status.code = -ECONNABORTED; - mxlnd_put_idle_tx(tx); - return; - } - if (tx->mxc_conn == NULL) { - mxlnd_conn_alloc(&tx->mxc_conn, peer); - } - LASSERT(tx->mxc_conn != NULL); - mxlnd_peer_queue_tx(tx); - ret = mxlnd_check_sends(peer); - } else { - spin_lock(&kmxlnd_data.kmx_tx_queue_lock); - list_add_tail(&tx->mxc_list, &kmxlnd_data.kmx_tx_queue); - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - up(&kmxlnd_data.kmx_tx_queue_sem); - } - return; -} - -int -mxlnd_setup_iov(struct kmx_ctx *ctx, u32 niov, struct iovec *iov, u32 offset, u32 nob) -{ - int i = 0; - int sum = 0; - int old_sum = 0; - int nseg = 0; - int first_iov = -1; - int first_iov_offset = 0; - int first_found = 0; - int last_iov = -1; - int last_iov_length = 0; - mx_ksegment_t *seg = NULL; - - if (niov == 0) return 0; - LASSERT(iov != NULL); - - for (i = 0; i < niov; i++) { - sum = old_sum + (u32) iov[i].iov_len; - if (!first_found && (sum > offset)) { - first_iov = i; - first_iov_offset = offset - old_sum; - first_found = 1; - sum = (u32) iov[i].iov_len - first_iov_offset; - old_sum = 0; - } - if (sum >= nob) { - last_iov = i; - last_iov_length = (u32) iov[i].iov_len - (sum - nob); - if (first_iov == last_iov) last_iov_length -= first_iov_offset; - break; - } - old_sum = sum; - } - LASSERT(first_iov >= 0 && last_iov >= first_iov); - nseg = last_iov - first_iov + 1; - LASSERT(nseg > 0); - - MXLND_ALLOC (seg, nseg * sizeof(*seg)); - if (seg == NULL) { - CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n"); - return -1; - } - memset(seg, 0, nseg * sizeof(*seg)); - ctx->mxc_nseg = nseg; - sum = 0; - for (i = 0; i < nseg; i++) { - seg[i].segment_ptr = MX_KVA_TO_U64(iov[first_iov + i].iov_base); - seg[i].segment_length = (u32) iov[first_iov + i].iov_len; - if (i == 0) { - seg[i].segment_ptr += (u64) first_iov_offset; - seg[i].segment_length -= (u32) first_iov_offset; - } - if (i == (nseg - 1)) { - seg[i].segment_length = (u32) last_iov_length; - } - sum += seg[i].segment_length; - } - ctx->mxc_seg_list = seg; - ctx->mxc_pin_type = MX_PIN_KERNEL; -#ifdef MX_PIN_FULLPAGES - ctx->mxc_pin_type |= MX_PIN_FULLPAGES; -#endif - LASSERT(nob == sum); - return 0; -} - -int -mxlnd_setup_kiov(struct kmx_ctx *ctx, u32 niov, lnet_kiov_t *kiov, u32 offset, u32 nob) -{ - int i = 0; - int sum = 0; - int old_sum = 0; - int nseg = 0; - int first_kiov = -1; - int first_kiov_offset = 0; - int first_found = 0; - int last_kiov = -1; - int last_kiov_length = 0; - mx_ksegment_t *seg = NULL; - - if (niov == 0) return 0; - LASSERT(kiov != NULL); - - for (i = 0; i < niov; i++) { - sum = old_sum + kiov[i].kiov_len; - if (i == 0) sum -= kiov[i].kiov_offset; - if (!first_found && (sum > offset)) { - first_kiov = i; - first_kiov_offset = offset - old_sum; - //if (i == 0) first_kiov_offset + kiov[i].kiov_offset; - if (i == 0) first_kiov_offset = kiov[i].kiov_offset; - first_found = 1; - sum = kiov[i].kiov_len - first_kiov_offset; - old_sum = 0; - } - if (sum >= nob) { - last_kiov = i; - last_kiov_length = kiov[i].kiov_len - (sum - nob); - if (first_kiov == last_kiov) last_kiov_length -= first_kiov_offset; - break; - } - old_sum = sum; - } - LASSERT(first_kiov >= 0 && last_kiov >= first_kiov); - nseg = last_kiov - first_kiov + 1; - LASSERT(nseg > 0); - - MXLND_ALLOC (seg, nseg * sizeof(*seg)); - if (seg == NULL) { - CDEBUG(D_NETERROR, "MXLND_ALLOC() failed\n"); - return -1; - } - memset(seg, 0, niov * sizeof(*seg)); - ctx->mxc_nseg = niov; - sum = 0; - for (i = 0; i < niov; i++) { - seg[i].segment_ptr = lnet_page2phys(kiov[first_kiov + i].kiov_page); - seg[i].segment_length = kiov[first_kiov + i].kiov_len; - if (i == 0) { - seg[i].segment_ptr += (u64) first_kiov_offset; - /* we have to add back the original kiov_offset */ - seg[i].segment_length -= first_kiov_offset + - kiov[first_kiov].kiov_offset; - } - if (i == (nseg - 1)) { - seg[i].segment_length = last_kiov_length; - } - sum += seg[i].segment_length; - } - ctx->mxc_seg_list = seg; - ctx->mxc_pin_type = MX_PIN_PHYSICAL; -#ifdef MX_PIN_FULLPAGES - ctx->mxc_pin_type |= MX_PIN_FULLPAGES; -#endif - LASSERT(nob == sum); - return 0; -} - -void -mxlnd_send_nak(struct kmx_ctx *tx, lnet_nid_t nid, int type, int status, __u64 cookie) -{ - LASSERT(type == MXLND_MSG_PUT_ACK); - mxlnd_init_tx_msg(tx, type, sizeof(kmx_putack_msg_t), tx->mxc_nid); - tx->mxc_cookie = cookie; - tx->mxc_msg->mxm_u.put_ack.mxpam_src_cookie = cookie; - tx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie = ((u64) status << 52); /* error code */ - tx->mxc_match = mxlnd_create_match(tx, status); - - mxlnd_queue_tx(tx); -} - - -/** - * mxlnd_send_data - get tx, map [k]iov, queue tx - * @ni - * @lntmsg - * @peer - * @msg_type - * @cookie - * - * This setups the DATA send for PUT or GET. - * - * On success, it queues the tx, on failure it calls lnet_finalize() - */ -void -mxlnd_send_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_peer *peer, u8 msg_type, u64 cookie) -{ - int ret = 0; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - struct kmx_ctx *tx = NULL; - - LASSERT(lntmsg != NULL); - LASSERT(peer != NULL); - LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA); - LASSERT((cookie>>52) == 0); - - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n", - msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA", - libcfs_nid2str(target.nid)); - goto failed_0; - } - tx->mxc_nid = target.nid; - mxlnd_conn_addref(peer->mxp_conn); - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - tx->mxc_msg_type = msg_type; - tx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - tx->mxc_state = MXLND_CTX_PENDING; - tx->mxc_lntmsg[0] = lntmsg; - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* This setups up the mx_ksegment_t to send the DATA payload */ - if (nob == 0) { - /* do not setup the segments */ - CDEBUG(D_NETERROR, "nob = 0; why didn't we use an EAGER reply " - "to %s?\n", libcfs_nid2str(target.nid)); - ret = 0; - } else if (kiov == NULL) { - ret = mxlnd_setup_iov(tx, niov, iov, offset, nob); - } else { - ret = mxlnd_setup_kiov(tx, niov, kiov, offset, nob); - } - if (ret != 0) { - CDEBUG(D_NETERROR, "Can't setup send DATA for %s\n", - libcfs_nid2str(target.nid)); - tx->mxc_status.code = -EIO; - goto failed_1; - } - mxlnd_queue_tx(tx); - return; - -failed_1: - mxlnd_conn_decref(peer->mxp_conn); - mxlnd_put_idle_tx(tx); - return; - -failed_0: - CDEBUG(D_NETERROR, "no tx avail\n"); - lnet_finalize(ni, lntmsg, -EIO); - return; -} - -/** - * mxlnd_recv_data - map [k]iov, post rx - * @ni - * @lntmsg - * @rx - * @msg_type - * @cookie - * - * This setups the DATA receive for PUT or GET. - * - * On success, it returns 0, on failure it returns -1 - */ -int -mxlnd_recv_data(lnet_ni_t *ni, lnet_msg_t *lntmsg, struct kmx_ctx *rx, u8 msg_type, u64 cookie) -{ - int ret = 0; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - mx_return_t mxret = MX_SUCCESS; - - /* above assumes MXLND_MSG_PUT_DATA */ - if (msg_type == MXLND_MSG_GET_DATA) { - niov = lntmsg->msg_md->md_niov; - iov = lntmsg->msg_md->md_iov.iov; - kiov = lntmsg->msg_md->md_iov.kiov; - offset = 0; - nob = lntmsg->msg_md->md_length; - } - - LASSERT(lntmsg != NULL); - LASSERT(rx != NULL); - LASSERT(msg_type == MXLND_MSG_PUT_DATA || msg_type == MXLND_MSG_GET_DATA); - LASSERT((cookie>>52) == 0); /* ensure top 12 bits are 0 */ - - rx->mxc_msg_type = msg_type; - rx->mxc_deadline = jiffies + MXLND_COMM_TIMEOUT; - rx->mxc_state = MXLND_CTX_PENDING; - rx->mxc_nid = target.nid; - /* if posting a GET_DATA, we may not yet know the peer */ - if (rx->mxc_peer != NULL) { - rx->mxc_conn = rx->mxc_peer->mxp_conn; - } - rx->mxc_lntmsg[0] = lntmsg; - rx->mxc_cookie = cookie; - rx->mxc_match = mxlnd_create_match(rx, 0); - /* This setups up the mx_ksegment_t to receive the DATA payload */ - if (kiov == NULL) { - ret = mxlnd_setup_iov(rx, niov, iov, offset, nob); - } else { - ret = mxlnd_setup_kiov(rx, niov, kiov, offset, nob); - } - if (msg_type == MXLND_MSG_GET_DATA) { - rx->mxc_lntmsg[1] = lnet_create_reply_msg(kmxlnd_data.kmx_ni, lntmsg); - if (rx->mxc_lntmsg[1] == NULL) { - CDEBUG(D_NETERROR, "Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - ret = -1; - } - } - if (ret != 0) { - CDEBUG(D_NETERROR, "Can't setup %s rx for %s\n", - msg_type == MXLND_MSG_PUT_DATA ? "PUT_DATA" : "GET_DATA", - libcfs_nid2str(target.nid)); - return -1; - } - ret = mxlnd_q_pending_ctx(rx); - if (ret == -1) { - return -1; - } - CDEBUG(D_NET, "receiving %s 0x%llx\n", mxlnd_msgtype_to_str(msg_type), rx->mxc_cookie); - mxret = mx_kirecv(kmxlnd_data.kmx_endpt, - rx->mxc_seg_list, rx->mxc_nseg, - rx->mxc_pin_type, rx->mxc_match, - 0xF00FFFFFFFFFFFFFLL, (void *) rx, - &rx->mxc_mxreq); - if (mxret != MX_SUCCESS) { - if (rx->mxc_conn != NULL) { - mxlnd_deq_pending_ctx(rx); - } - CDEBUG(D_NETERROR, "mx_kirecv() failed with %d for %s\n", - (int) mxret, libcfs_nid2str(target.nid)); - return -1; - } - - return 0; -} - -/** - * mxlnd_send - the LND required send function - * @ni - * @private - * @lntmsg - * - * This must not block. Since we may not have a peer struct for the receiver, - * it will append send messages on a global tx list. We will then up the - * tx_queued's semaphore to notify it of the new send. - */ -int -mxlnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - int ret = 0; - int type = lntmsg->msg_type; - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - lnet_process_id_t target = lntmsg->msg_target; - lnet_nid_t nid = target.nid; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_ctx *rx = (struct kmx_ctx *) private; /* for REPLY */ - struct kmx_ctx *rx_data = NULL; - struct kmx_conn *conn = NULL; - int nob = 0; - uint32_t length = 0; - struct kmx_peer *peer = NULL; - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - /* private is used on LNET_GET_REPLY only, NULL for all other cases */ - - /* NOTE we may not know the peer if it is the very first PUT_REQ or GET_REQ - * to a new peer, use the nid */ - peer = mxlnd_find_peer_by_nid(nid); - if (peer != NULL) { - conn = peer->mxp_conn; - if (conn) mxlnd_conn_addref(conn); - } - if (conn == NULL && peer != NULL) { - CDEBUG(D_NETERROR, "conn==NULL peer=0x%p nid=0x%llx payload_nob=%d type=%s\n", - peer, nid, payload_nob, ((type==LNET_MSG_PUT) ? "PUT" : - ((type==LNET_MSG_GET) ? "GET" : "Other"))); - } - - switch (type) { - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need DATA? */ - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]); - if (nob <= MXLND_EAGER_SIZE) - break; /* send EAGER */ - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate %s tx for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); - return -ENOMEM; - } - - /* the peer may be NULL */ - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* we added a conn ref above */ - mxlnd_init_tx_msg (tx, MXLND_MSG_PUT_REQ, sizeof(kmx_putreq_msg_t), nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.put_req.mxprm_hdr = *hdr; - txmsg->mxm_u.put_req.mxprm_cookie = tx->mxc_cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* we must post a receive _before_ sending the request. - * we need to determine how much to receive, it will be either - * a put_ack or a put_nak. The put_ack is larger, so use it. */ - - rx = mxlnd_get_idle_rx(); - if (unlikely(rx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate rx for PUT_ACK for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_tx(tx); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx->mxc_nid = nid; - rx->mxc_peer = peer; - /* conn may be NULL but unlikely since the first msg is always small */ - if (conn) mxlnd_conn_addref(conn); /* for this rx */ - rx->mxc_conn = conn; - rx->mxc_msg_type = MXLND_MSG_PUT_ACK; - rx->mxc_cookie = tx->mxc_cookie; - rx->mxc_match = mxlnd_create_match(rx, 0); - - length = offsetof(kmx_msg_t, mxm_u) + sizeof(kmx_putack_msg_t); - ret = mxlnd_recv_msg(lntmsg, rx, MXLND_MSG_PUT_ACK, rx->mxc_match, length); - if (unlikely(ret != 0)) { - CDEBUG(D_NETERROR, "recv_msg() failed for PUT_ACK for %s\n", - libcfs_nid2str(nid)); - rx->mxc_lntmsg[0] = NULL; - mxlnd_put_idle_rx(rx); - mxlnd_put_idle_tx(tx); - if (conn) { - mxlnd_conn_decref(conn); /* for the rx... */ - mxlnd_conn_decref(conn); /* and for the tx */ - } - return -ENOMEM; - } - - mxlnd_queue_tx(tx); - return 0; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send EAGER */ - - /* is the REPLY message too small for DATA? */ - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[lntmsg->msg_md->md_length]); - if (nob <= MXLND_EAGER_SIZE) - break; /* send EAGER */ - - /* get tx (we need the cookie) , post rx for incoming DATA, - * then post GET_REQ tx */ - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate GET tx for %s\n", - libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx_data = mxlnd_get_idle_rx(); - if (unlikely(rx_data == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate DATA rx for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_tx(tx); - if (conn) mxlnd_conn_decref(conn); /* for the ref taken above */ - return -ENOMEM; - } - rx_data->mxc_peer = peer; - if (conn) mxlnd_conn_addref(conn); /* for the rx_data */ - rx_data->mxc_conn = conn; /* may be NULL */ - - ret = mxlnd_recv_data(ni, lntmsg, rx_data, MXLND_MSG_GET_DATA, tx->mxc_cookie); - if (unlikely(ret != 0)) { - CDEBUG(D_NETERROR, "Can't setup GET sink for %s\n", - libcfs_nid2str(nid)); - mxlnd_put_idle_rx(rx_data); - mxlnd_put_idle_tx(tx); - if (conn) { - mxlnd_conn_decref(conn); /* for the rx_data... */ - mxlnd_conn_decref(conn); /* and for the tx */ - } - return -EIO; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* conn ref taken above */ - mxlnd_init_tx_msg(tx, MXLND_MSG_GET_REQ, sizeof(kmx_getreq_msg_t), nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.get_req.mxgrm_hdr = *hdr; - txmsg->mxm_u.get_req.mxgrm_cookie = tx->mxc_cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - mxlnd_queue_tx(tx); - return 0; - - default: - LBUG(); - if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */ - return -EIO; - } - - /* send EAGER */ - - LASSERT (offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[payload_nob]) - <= MXLND_EAGER_SIZE); - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't send %s to %s: tx descs exhausted\n", - mxlnd_lnetmsg_to_str(type), libcfs_nid2str(nid)); - if (conn) mxlnd_conn_decref(conn); /* drop ref taken above */ - return -ENOMEM; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; /* may be NULL */ - /* conn ref taken above */ - nob = offsetof(kmx_eager_msg_t, mxem_payload[payload_nob]); - mxlnd_init_tx_msg (tx, MXLND_MSG_EAGER, nob, nid); - tx->mxc_match = mxlnd_create_match(tx, 0); - - txmsg = tx->mxc_msg; - txmsg->mxm_u.eager.mxem_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(MXLND_EAGER_SIZE, txmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - payload_niov, payload_kiov, payload_offset, payload_nob); - else - lnet_copy_iov2flat(MXLND_EAGER_SIZE, txmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - payload_niov, payload_iov, payload_offset, payload_nob); - - tx->mxc_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - mxlnd_queue_tx(tx); - return 0; -} - -/** - * mxlnd_recv - the LND required recv function - * @ni - * @private - * @lntmsg - * @delayed - * @niov - * @kiov - * @offset - * @mlen - * @rlen - * - * This must not block. - */ -int -mxlnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - int ret = 0; - int nob = 0; - int len = 0; - struct kmx_ctx *rx = private; - struct kmx_msg *rxmsg = rx->mxc_msg; - lnet_nid_t nid = rx->mxc_nid; - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_peer *peer = rx->mxc_peer; - struct kmx_conn *conn = peer->mxp_conn; - u64 cookie = 0LL; - int msg_type = rxmsg->mxm_type; - int repost = 1; - int credit = 0; - int finalize = 0; - - LASSERT (mlen <= rlen); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - LASSERT (peer != NULL); - - /* conn_addref(conn) already taken for the primary rx */ - - switch (msg_type) { - case MXLND_MSG_EAGER: - nob = offsetof(kmx_msg_t, mxm_u.eager.mxem_payload[rlen]); - len = rx->mxc_status.xfer_length; - if (unlikely(nob > len)) { - CDEBUG(D_NETERROR, "Eager message from %s too big: %d(%d)\n", - libcfs_nid2str(nid), nob, len); - ret = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - MXLND_EAGER_SIZE, rxmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - MXLND_EAGER_SIZE, rxmsg, - offsetof(kmx_msg_t, mxm_u.eager.mxem_payload), - mlen); - finalize = 1; - credit = 1; - break; - - case MXLND_MSG_PUT_REQ: - /* we are going to reuse the rx, store the needed info */ - cookie = rxmsg->mxm_u.put_req.mxprm_cookie; - - /* get tx, post rx, send PUT_ACK */ - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't allocate tx for %s\n", libcfs_nid2str(nid)); - /* Not replying will break the connection */ - ret = -ENOMEM; - break; - } - if (unlikely(mlen == 0)) { - finalize = 1; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, 0, cookie); - /* repost = 1 */ - break; - } - - mxlnd_init_tx_msg(tx, MXLND_MSG_PUT_ACK, sizeof(kmx_putack_msg_t), nid); - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_conn_addref(conn); /* for the tx */ - txmsg = tx->mxc_msg; - txmsg->mxm_u.put_ack.mxpam_src_cookie = cookie; - txmsg->mxm_u.put_ack.mxpam_dst_cookie = tx->mxc_cookie; - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, 0); - - /* we must post a receive _before_ sending the PUT_ACK */ - mxlnd_ctx_init(rx); - rx->mxc_state = MXLND_CTX_PREP; - rx->mxc_peer = peer; - rx->mxc_conn = conn; - /* do not take another ref for this rx, it is already taken */ - rx->mxc_nid = peer->mxp_nid; - ret = mxlnd_recv_data(ni, lntmsg, rx, MXLND_MSG_PUT_DATA, - txmsg->mxm_u.put_ack.mxpam_dst_cookie); - - if (unlikely(ret != 0)) { - /* Notify peer that it's over */ - CDEBUG(D_NETERROR, "Can't setup PUT_DATA rx for %s: %d\n", - libcfs_nid2str(nid), ret); - mxlnd_ctx_init(tx); - tx->mxc_state = MXLND_CTX_PREP; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - /* finalize = 0, let the PUT_ACK tx finalize this */ - tx->mxc_lntmsg[0] = rx->mxc_lntmsg[0]; - tx->mxc_lntmsg[1] = rx->mxc_lntmsg[1]; - /* conn ref already taken above */ - mxlnd_send_nak(tx, nid, MXLND_MSG_PUT_ACK, ret, cookie); - /* repost = 1 */ - break; - } - - mxlnd_queue_tx(tx); - /* do not return a credit until after PUT_DATA returns */ - repost = 0; - break; - - case MXLND_MSG_GET_REQ: - if (likely(lntmsg != NULL)) { - mxlnd_send_data(ni, lntmsg, rx->mxc_peer, MXLND_MSG_GET_DATA, - rx->mxc_msg->mxm_u.get_req.mxgrm_cookie); - } else { - /* GET didn't match anything */ - /* The initiator has a rx mapped to [k]iov. We cannot send a nak. - * We have to embed the error code in the match bits. - * Send the error in bits 52-59 and the cookie in bits 0-51 */ - u64 cookie = rxmsg->mxm_u.get_req.mxgrm_cookie; - - tx = mxlnd_get_idle_tx(); - if (unlikely(tx == NULL)) { - CDEBUG(D_NETERROR, "Can't get tx for GET NAK for %s\n", - libcfs_nid2str(nid)); - ret = -ENOMEM; - break; - } - tx->mxc_msg_type = MXLND_MSG_GET_DATA; - tx->mxc_state = MXLND_CTX_PENDING; - tx->mxc_nid = nid; - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_conn_addref(conn); /* for this tx */ - tx->mxc_cookie = cookie; - tx->mxc_match = mxlnd_create_match(tx, ENODATA); - tx->mxc_pin_type = MX_PIN_PHYSICAL; - mxlnd_queue_tx(tx); - } - /* finalize lntmsg after tx completes */ - break; - - default: - LBUG(); - } - - if (repost) { - /* we received a message, increment peer's outstanding credits */ - if (credit == 1) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } - /* we are done with the rx */ - mxlnd_put_idle_rx(rx); - mxlnd_conn_decref(conn); - } - - if (finalize == 1) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg, 0); - - /* we received a credit, see if we can use it to send a msg */ - if (credit) mxlnd_check_sends(peer); - - return ret; -} - -void -mxlnd_sleep(unsigned long timeout) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(timeout); - return; -} - -/** - * mxlnd_tx_queued - the generic send queue thread - * @arg - thread id (as a void *) - * - * This thread moves send messages from the global tx_queue to the owning - * peer's tx_[msg|data]_queue. If the peer does not exist, it creates one and adds - * it to the global peer list. - */ -int -mxlnd_tx_queued(void *arg) -{ - long id = (long) arg; - int ret = 0; - int found = 0; - struct kmx_ctx *tx = NULL; - struct kmx_peer *peer = NULL; - struct list_head *tmp_tx = NULL; - - cfs_daemonize("mxlnd_tx_queued"); - //cfs_block_allsigs(); - - while (!kmxlnd_data.kmx_shutdown) { - ret = down_interruptible(&kmxlnd_data.kmx_tx_queue_sem); - if (kmxlnd_data.kmx_shutdown) - break; - if (ret != 0) // Should we check for -EINTR? - continue; - spin_lock(&kmxlnd_data.kmx_tx_queue_lock); - if (list_empty (&kmxlnd_data.kmx_tx_queue)) { - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - continue; - } - tmp_tx = &kmxlnd_data.kmx_tx_queue; - tx = list_entry (tmp_tx->next, struct kmx_ctx, mxc_list); - list_del_init(&tx->mxc_list); - spin_unlock(&kmxlnd_data.kmx_tx_queue_lock); - - found = 0; - peer = mxlnd_find_peer_by_nid(tx->mxc_nid); - if (peer != NULL) { - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - mxlnd_conn_addref(tx->mxc_conn); /* for this tx */ - mxlnd_queue_tx(tx); - found = 1; - } - if (found == 0) { - int hash = 0; - struct kmx_peer *peer = NULL; - struct kmx_peer *old = NULL; - - hash = mxlnd_nid_to_hash(tx->mxc_nid); - - LASSERT(tx->mxc_msg_type != MXLND_MSG_PUT_DATA && - tx->mxc_msg_type != MXLND_MSG_GET_DATA); - /* create peer */ - ret = mxlnd_peer_alloc(&peer, tx->mxc_nid); - if (ret != 0) { - /* finalize message */ - tx->mxc_status.code = -ECONNABORTED; - mxlnd_put_idle_tx(tx); - continue; - } - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - - /* add peer to global peer list, but look to see - * if someone already created it after we released - * the read lock */ - write_lock(&kmxlnd_data.kmx_peers_lock); - list_for_each_entry(old, &kmxlnd_data.kmx_peers[hash], mxp_peers) { - if (old->mxp_nid == peer->mxp_nid) { - /* somebody beat us here, we created a duplicate */ - found = 1; - break; - } - } - - if (found == 0) { - list_add_tail(&peer->mxp_peers, &kmxlnd_data.kmx_peers[hash]); - atomic_inc(&kmxlnd_data.kmx_npeers); - } else { - tx->mxc_peer = old; - tx->mxc_conn = old->mxp_conn; - mxlnd_reduce_idle_rxs(*kmxlnd_tunables.kmx_credits - 1); - mxlnd_peer_decref(peer); - } - mxlnd_conn_addref(tx->mxc_conn); /* for this tx */ - write_unlock(&kmxlnd_data.kmx_peers_lock); - - mxlnd_queue_tx(tx); - } - } - mxlnd_thread_stop(id); - return 0; -} - -/* When calling this, we must not have the peer lock. */ -void -mxlnd_iconnect(struct kmx_peer *peer, u64 mask) -{ - mx_return_t mxret = MX_SUCCESS; - mx_request_t request; - struct kmx_conn *conn = peer->mxp_conn; - - mxlnd_conn_addref(conn); /* hold until CONN_REQ or CONN_ACK completes */ - - LASSERT(mask == MXLND_MASK_ICON_REQ || - mask == MXLND_MASK_ICON_ACK); - - if (peer->mxp_reconnect_time == 0) { - peer->mxp_reconnect_time = jiffies; - } - - if (peer->mxp_nic_id == 0LL) { - mxlnd_peer_hostname_to_nic_id(peer); - if (peer->mxp_nic_id == 0LL) { - /* not mapped yet, return */ - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_INIT; - spin_unlock(&conn->mxk_lock); - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - /* give up and notify LNET */ - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&peer->mxp_conn, peer); - } - mxlnd_conn_decref(conn); - return; - } - } - - mxret = mx_iconnect(kmxlnd_data.kmx_endpt, peer->mxp_nic_id, - peer->mxp_host->mxh_ep_id, MXLND_MSG_MAGIC, mask, - (void *) peer, &request); - if (unlikely(mxret != MX_SUCCESS)) { - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n", - mx_strerror(mxret), mxret, libcfs_nid2str(peer->mxp_nid)); - mxlnd_conn_decref(conn); - } - return; -} - -#define MXLND_STATS 0 - -int -mxlnd_check_sends(struct kmx_peer *peer) -{ - int ret = 0; - int found = 0; - mx_return_t mxret = MX_SUCCESS; - struct kmx_ctx *tx = NULL; - struct kmx_conn *conn = NULL; - u8 msg_type = 0; - int credit = 0; - int status = 0; - int ntx_posted = 0; - int credits = 0; -#if MXLND_STATS - static unsigned long last = 0; -#endif - - if (unlikely(peer == NULL)) { - LASSERT(peer != NULL); - return -1; - } - conn = peer->mxp_conn; - /* do not add another ref for this tx */ - - if (conn == NULL) { - /* we do not have any conns */ - return -1; - } - -#if MXLND_STATS - if (time_after(jiffies, last)) { - last = jiffies + HZ; - CDEBUG(D_NET, "status= %s credits= %d outstanding= %d ntx_msgs= %d " - "ntx_posted= %d ntx_data= %d data_posted= %d\n", - mxlnd_connstatus_to_str(conn->mxk_status), conn->mxk_credits, - conn->mxk_outstanding, conn->mxk_ntx_msgs, conn->mxk_ntx_posted, - conn->mxk_ntx_data, conn->mxk_data_posted); - } -#endif - - /* cache peer state for asserts */ - spin_lock(&conn->mxk_lock); - ntx_posted = conn->mxk_ntx_posted; - credits = conn->mxk_credits; - spin_unlock(&conn->mxk_lock); - - LASSERT(ntx_posted <= *kmxlnd_tunables.kmx_credits); - LASSERT(ntx_posted >= 0); - - LASSERT(credits <= *kmxlnd_tunables.kmx_credits); - LASSERT(credits >= 0); - - /* check number of queued msgs, ignore data */ - spin_lock(&conn->mxk_lock); - if (conn->mxk_outstanding >= MXLND_CREDIT_HIGHWATER) { - /* check if any txs queued that could return credits... */ - if (list_empty(&conn->mxk_tx_credit_queue) || conn->mxk_ntx_msgs == 0) { - /* if not, send a NOOP */ - tx = mxlnd_get_idle_tx(); - if (likely(tx != NULL)) { - tx->mxc_peer = peer; - tx->mxc_conn = peer->mxp_conn; - mxlnd_conn_addref(conn); /* for this tx */ - mxlnd_init_tx_msg (tx, MXLND_MSG_NOOP, 0, peer->mxp_nid); - tx->mxc_match = mxlnd_create_match(tx, 0); - mxlnd_peer_queue_tx_locked(tx); - found = 1; - goto done_locked; - } - } - } - spin_unlock(&conn->mxk_lock); - - /* if the peer is not ready, try to connect */ - spin_lock(&conn->mxk_lock); - if (unlikely(conn->mxk_status == MXLND_CONN_INIT || - conn->mxk_status == MXLND_CONN_FAIL || - conn->mxk_status == MXLND_CONN_REQ)) { - CDEBUG(D_NET, "status=%s\n", mxlnd_connstatus_to_str(conn->mxk_status)); - conn->mxk_status = MXLND_CONN_WAIT; - spin_unlock(&conn->mxk_lock); - mxlnd_iconnect(peer, MXLND_MASK_ICON_REQ); - goto done; - } - spin_unlock(&conn->mxk_lock); - - spin_lock(&conn->mxk_lock); - while (!list_empty(&conn->mxk_tx_free_queue) || - !list_empty(&conn->mxk_tx_credit_queue)) { - /* We have something to send. If we have a queued tx that does not - * require a credit (free), choose it since its completion will - * return a credit (here or at the peer), complete a DATA or - * CONN_REQ or CONN_ACK. */ - struct list_head *tmp_tx = NULL; - if (!list_empty(&conn->mxk_tx_free_queue)) { - tmp_tx = &conn->mxk_tx_free_queue; - } else { - tmp_tx = &conn->mxk_tx_credit_queue; - } - tx = list_entry(tmp_tx->next, struct kmx_ctx, mxc_list); - - msg_type = tx->mxc_msg_type; - - /* don't try to send a rx */ - LASSERT(tx->mxc_type == MXLND_REQ_TX); - - /* ensure that it is a valid msg type */ - LASSERT(msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK || - msg_type == MXLND_MSG_NOOP || - msg_type == MXLND_MSG_EAGER || - msg_type == MXLND_MSG_PUT_REQ || - msg_type == MXLND_MSG_PUT_ACK || - msg_type == MXLND_MSG_PUT_DATA || - msg_type == MXLND_MSG_GET_REQ || - msg_type == MXLND_MSG_GET_DATA); - LASSERT(tx->mxc_peer == peer); - LASSERT(tx->mxc_nid == peer->mxp_nid); - - credit = mxlnd_tx_requires_credit(tx); - if (credit) { - - if (conn->mxk_ntx_posted == *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - - if (conn->mxk_credits == 0) { - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - - if (conn->mxk_credits == 1 && /* last credit reserved for */ - conn->mxk_outstanding == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(peer->mxp_nid)); - goto done_locked; - } - } - - if (unlikely(conn->mxk_status != MXLND_CONN_READY)) { - if ( ! (msg_type == MXLND_MSG_CONN_REQ || - msg_type == MXLND_MSG_CONN_ACK)) { - CDEBUG(D_NET, "peer status is %s for tx 0x%llx (%s)\n", - mxlnd_connstatus_to_str(conn->mxk_status), - tx->mxc_cookie, - mxlnd_msgtype_to_str(tx->mxc_msg_type)); - if (conn->mxk_status == MXLND_CONN_DISCONNECT) { - list_del_init(&tx->mxc_list); - tx->mxc_status.code = -ECONNABORTED; - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - } - goto done_locked; - } - } - - list_del_init(&tx->mxc_list); - - /* handle credits, etc now while we have the lock to avoid races */ - if (credit) { - conn->mxk_credits--; - conn->mxk_ntx_posted++; - } - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA) { - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - conn->mxk_ntx_msgs--; - } - } - if (tx->mxc_incarnation == 0 && - conn->mxk_incarnation != 0) { - tx->mxc_incarnation = conn->mxk_incarnation; - } - spin_unlock(&conn->mxk_lock); - - /* if this is a NOOP and (1) mxp_conn->mxk_outstanding < CREDIT_HIGHWATER - * or (2) there is a non-DATA msg that can return credits in the - * queue, then drop this duplicate NOOP */ - if (unlikely(msg_type == MXLND_MSG_NOOP)) { - spin_lock(&conn->mxk_lock); - if ((conn->mxk_outstanding < MXLND_CREDIT_HIGHWATER) || - (conn->mxk_ntx_msgs >= 1)) { - conn->mxk_credits++; - conn->mxk_ntx_posted--; - spin_unlock(&conn->mxk_lock); - /* redundant NOOP */ - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(peer->mxp_nid)); - found = 1; - goto done; - } - spin_unlock(&conn->mxk_lock); - } - - found = 1; - if (likely((msg_type != MXLND_MSG_PUT_DATA) && - (msg_type != MXLND_MSG_GET_DATA))) { - mxlnd_pack_msg(tx); - } - - //ret = -ECONNABORTED; - mxret = MX_SUCCESS; - - spin_lock(&conn->mxk_lock); - status = conn->mxk_status; - spin_unlock(&conn->mxk_lock); - - if (likely((status == MXLND_CONN_READY) || - (msg_type == MXLND_MSG_CONN_REQ) || - (msg_type == MXLND_MSG_CONN_ACK))) { - ret = 0; - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - /* add to the pending list */ - ret = mxlnd_q_pending_ctx(tx); - if (ret == -1) { - /* FIXME the conn is disconnected, now what? */ - } - } else { - /* CONN_REQ/ACK */ - tx->mxc_state = MXLND_CTX_PENDING; - } - - if (ret == 0) { - if (likely(msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA)) { - /* send a msg style tx */ - LASSERT(tx->mxc_nseg == 1); - LASSERT(tx->mxc_pin_type == MX_PIN_PHYSICAL); - CDEBUG(D_NET, "sending %s 0x%llx\n", - mxlnd_msgtype_to_str(msg_type), - tx->mxc_cookie); - mxret = mx_kisend(kmxlnd_data.kmx_endpt, - &tx->mxc_seg, - tx->mxc_nseg, - tx->mxc_pin_type, - conn->mxk_epa, - tx->mxc_match, - (void *) tx, - &tx->mxc_mxreq); - } else { - /* send a DATA tx */ - spin_lock(&conn->mxk_lock); - conn->mxk_ntx_data--; - conn->mxk_data_posted++; - spin_unlock(&conn->mxk_lock); - CDEBUG(D_NET, "sending %s 0x%llx\n", - mxlnd_msgtype_to_str(msg_type), - tx->mxc_cookie); - mxret = mx_kisend(kmxlnd_data.kmx_endpt, - tx->mxc_seg_list, - tx->mxc_nseg, - tx->mxc_pin_type, - conn->mxk_epa, - tx->mxc_match, - (void *) tx, - &tx->mxc_mxreq); - } - } else { - mxret = MX_CONNECTION_FAILED; - } - if (likely(mxret == MX_SUCCESS)) { - ret = 0; - } else { - CDEBUG(D_NETERROR, "mx_kisend() failed with %s (%d) " - "sending to %s\n", mx_strerror(mxret), (int) mxret, - libcfs_nid2str(peer->mxp_nid)); - /* NOTE mx_kisend() only fails if there are not enough - * resources. Do not change the connection status. */ - if (mxret == MX_NO_RESOURCES) { - tx->mxc_status.code = -ENOMEM; - } else { - tx->mxc_status.code = -ECONNABORTED; - } - if (credit) { - spin_lock(&conn->mxk_lock); - conn->mxk_ntx_posted--; - conn->mxk_credits++; - spin_unlock(&conn->mxk_lock); - } else if (msg_type == MXLND_MSG_PUT_DATA || - msg_type == MXLND_MSG_GET_DATA) { - spin_lock(&conn->mxk_lock); - conn->mxk_data_posted--; - spin_unlock(&conn->mxk_lock); - } - if (msg_type != MXLND_MSG_PUT_DATA && - msg_type != MXLND_MSG_GET_DATA && - msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding += tx->mxc_msg->mxm_credits; - spin_unlock(&conn->mxk_lock); - } - if (msg_type != MXLND_MSG_CONN_REQ && - msg_type != MXLND_MSG_CONN_ACK) { - /* remove from the pending list */ - mxlnd_deq_pending_ctx(tx); - } - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - } - } - spin_lock(&conn->mxk_lock); - } -done_locked: - spin_unlock(&conn->mxk_lock); -done: - return found; -} - - -/** - * mxlnd_handle_tx_completion - a tx completed, progress or complete the msg - * @ctx - the tx descriptor - * - * Determine which type of send request it was and start the next step, if needed, - * or, if done, signal completion to LNET. After we are done, put back on the - * idle tx list. - */ -void -mxlnd_handle_tx_completion(struct kmx_ctx *tx) -{ - int failed = (tx->mxc_status.code != MX_STATUS_SUCCESS); - struct kmx_msg *msg = tx->mxc_msg; - struct kmx_peer *peer = tx->mxc_peer; - struct kmx_conn *conn = tx->mxc_conn; - u8 type = tx->mxc_msg_type; - int credit = mxlnd_tx_requires_credit(tx); - u64 cookie = tx->mxc_cookie; - - CDEBUG(D_NET, "entering %s (0x%llx):\n", - mxlnd_msgtype_to_str(tx->mxc_msg_type), cookie); - - if (unlikely(conn == NULL)) { - mx_get_endpoint_addr_context(tx->mxc_status.source, (void **) &conn); - if (conn != NULL) { - /* do not add a ref for the tx, it was set before sending */ - tx->mxc_conn = conn; - tx->mxc_peer = conn->mxk_peer; - } - } - LASSERT (peer != NULL); - LASSERT (conn != NULL); - - if (type != MXLND_MSG_PUT_DATA && type != MXLND_MSG_GET_DATA) { - LASSERT (type == msg->mxm_type); - } - - if (failed) { - tx->mxc_status.code = -EIO; - } else { - spin_lock(&conn->mxk_lock); - conn->mxk_last_tx = jiffies; - spin_unlock(&conn->mxk_lock); - } - - switch (type) { - - case MXLND_MSG_GET_DATA: - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_outstanding++; - conn->mxk_data_posted--; - } - spin_unlock(&conn->mxk_lock); - break; - - case MXLND_MSG_PUT_DATA: - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_data_posted--; - } - spin_unlock(&conn->mxk_lock); - break; - - case MXLND_MSG_NOOP: - case MXLND_MSG_PUT_REQ: - case MXLND_MSG_PUT_ACK: - case MXLND_MSG_GET_REQ: - case MXLND_MSG_EAGER: - //case MXLND_MSG_NAK: - break; - - case MXLND_MSG_CONN_ACK: - if (peer->mxp_incompatible) { - /* we sent our params, now close this conn */ - mxlnd_conn_disconnect(conn, 0, 1); - } - case MXLND_MSG_CONN_REQ: - if (failed) { - CDEBUG(D_NETERROR, "handle_tx_completion(): %s " - "failed with %s (%d) to %s\n", - type == MXLND_MSG_CONN_REQ ? "CONN_REQ" : "CONN_ACK", - mx_strstatus(tx->mxc_status.code), - tx->mxc_status.code, - libcfs_nid2str(tx->mxc_nid)); - if (!peer->mxp_incompatible) { - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - } - } - break; - - default: - CDEBUG(D_NETERROR, "Unknown msg type of %d\n", type); - LBUG(); - } - - if (credit) { - spin_lock(&conn->mxk_lock); - if (conn->mxk_incarnation == tx->mxc_incarnation) { - conn->mxk_ntx_posted--; - } - spin_unlock(&conn->mxk_lock); - } - - CDEBUG(D_NET, "leaving mxlnd_handle_tx_completion()\n"); - mxlnd_put_idle_tx(tx); - mxlnd_conn_decref(conn); - - mxlnd_check_sends(peer); - - return; -} - -void -mxlnd_handle_rx_completion(struct kmx_ctx *rx) -{ - int ret = 0; - int repost = 1; - int credit = 1; - u32 nob = rx->mxc_status.xfer_length; - u64 bits = rx->mxc_status.match_info; - struct kmx_msg *msg = rx->mxc_msg; - struct kmx_peer *peer = rx->mxc_peer; - struct kmx_conn *conn = rx->mxc_conn; - u8 type = rx->mxc_msg_type; - u64 seq = 0LL; - lnet_msg_t *lntmsg[2]; - int result = 0; - u64 nic_id = 0LL; - u32 ep_id = 0; - int decref = 1; - int incompatible = 0; - - /* NOTE We may only know the peer's nid if it is a PUT_REQ, GET_REQ, - * failed GET reply, CONN_REQ, or a CONN_ACK */ - - /* NOTE peer may still be NULL if it is a new peer */ - if (peer == NULL || conn == NULL) { - /* if the peer was disconnected, the peer may exist but - * not have any valid conns */ - decref = 0; /* no peer means no ref was taken for this rx */ - } - - if (conn == NULL && peer != NULL) { - conn = peer->mxp_conn; - rx->mxc_conn = conn; - } - -#if MXLND_DEBUG - CDEBUG(D_NET, "receiving msg bits=0x%llx nob=%d peer=0x%p\n", bits, nob, peer); -#endif - - lntmsg[0] = NULL; - lntmsg[1] = NULL; - - if (rx->mxc_status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "rx from %s failed with %s (%d)\n", - libcfs_nid2str(rx->mxc_nid), - mx_strstatus(rx->mxc_status.code), - (int) rx->mxc_status.code); - credit = 0; - goto cleanup; - } - - if (nob == 0) { - /* this may be a failed GET reply */ - if (type == MXLND_MSG_GET_DATA) { - bits = rx->mxc_status.match_info & 0x0FF0000000000000LL; - ret = (u32) (bits>>52); - lntmsg[0] = rx->mxc_lntmsg[0]; - result = -ret; - goto cleanup; - } else { - /* we had a rx complete with 0 bytes (no hdr, nothing) */ - CDEBUG(D_NETERROR, "rx from %s returned with 0 bytes\n", - libcfs_nid2str(rx->mxc_nid)); - goto cleanup; - } - } - - /* NOTE PUT_DATA and GET_DATA do not have mxc_msg, do not call unpack() */ - if (type == MXLND_MSG_PUT_DATA) { - result = rx->mxc_status.code; - lntmsg[0] = rx->mxc_lntmsg[0]; - goto cleanup; - } else if (type == MXLND_MSG_GET_DATA) { - result = rx->mxc_status.code; - lntmsg[0] = rx->mxc_lntmsg[0]; - lntmsg[1] = rx->mxc_lntmsg[1]; - goto cleanup; - } - - ret = mxlnd_unpack_msg(msg, nob); - if (ret != 0) { - CDEBUG(D_NETERROR, "Error %d unpacking rx from %s\n", - ret, libcfs_nid2str(rx->mxc_nid)); - goto cleanup; - } - rx->mxc_nob = nob; - type = msg->mxm_type; - seq = msg->mxm_seq; - - if (type != MXLND_MSG_CONN_REQ && - (!lnet_ptlcompat_matchnid(rx->mxc_nid, msg->mxm_srcnid) || - !lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid))) { - CDEBUG(D_NETERROR, "rx with mismatched NID (type %s) (my nid is " - "0x%llx and rx msg dst is 0x%llx)\n", - mxlnd_msgtype_to_str(type), kmxlnd_data.kmx_ni->ni_nid, - msg->mxm_dstnid); - goto cleanup; - } - - if (type != MXLND_MSG_CONN_REQ && type != MXLND_MSG_CONN_ACK) { - if ((conn != NULL && msg->mxm_srcstamp != conn->mxk_incarnation) || - msg->mxm_dststamp != kmxlnd_data.kmx_incarnation) { - if (conn != NULL) { - CDEBUG(D_NETERROR, "Stale rx from %s with type %s " - "(mxm_srcstamp (%lld) != mxk_incarnation (%lld) " - "|| mxm_dststamp (%lld) != kmx_incarnation (%lld))\n", - libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type), - msg->mxm_srcstamp, conn->mxk_incarnation, - msg->mxm_dststamp, kmxlnd_data.kmx_incarnation); - } else { - CDEBUG(D_NETERROR, "Stale rx from %s with type %s " - "mxm_dststamp (%lld) != kmx_incarnation (%lld))\n", - libcfs_nid2str(rx->mxc_nid), mxlnd_msgtype_to_str(type), - msg->mxm_dststamp, kmxlnd_data.kmx_incarnation); - } - credit = 0; - goto cleanup; - } - } - - CDEBUG(D_NET, "Received %s with %d credits\n", - mxlnd_msgtype_to_str(type), msg->mxm_credits); - - if (msg->mxm_type != MXLND_MSG_CONN_REQ && - msg->mxm_type != MXLND_MSG_CONN_ACK) { - LASSERT(peer != NULL); - LASSERT(conn != NULL); - if (msg->mxm_credits != 0) { - spin_lock(&conn->mxk_lock); - if (msg->mxm_srcstamp == conn->mxk_incarnation) { - if ((conn->mxk_credits + msg->mxm_credits) > - *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "mxk_credits %d mxm_credits %d\n", - conn->mxk_credits, msg->mxm_credits); - } - conn->mxk_credits += msg->mxm_credits; - LASSERT(conn->mxk_credits >= 0); - LASSERT(conn->mxk_credits <= *kmxlnd_tunables.kmx_credits); - } - spin_unlock(&conn->mxk_lock); - } - } - - CDEBUG(D_NET, "switch %s for rx (0x%llx)\n", mxlnd_msgtype_to_str(type), seq); - switch (type) { - case MXLND_MSG_NOOP: - break; - - case MXLND_MSG_EAGER: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.eager.mxem_hdr, - msg->mxm_srcnid, rx, 0); - repost = ret < 0; - break; - - case MXLND_MSG_PUT_REQ: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.put_req.mxprm_hdr, - msg->mxm_srcnid, rx, 1); - repost = ret < 0; - break; - - case MXLND_MSG_PUT_ACK: { - u64 cookie = (u64) msg->mxm_u.put_ack.mxpam_dst_cookie; - if (cookie > MXLND_MAX_COOKIE) { - CDEBUG(D_NETERROR, "NAK for msg_type %d from %s\n", rx->mxc_msg_type, - libcfs_nid2str(rx->mxc_nid)); - result = -((cookie >> 52) & 0xff); - lntmsg[0] = rx->mxc_lntmsg[0]; - } else { - mxlnd_send_data(kmxlnd_data.kmx_ni, rx->mxc_lntmsg[0], - rx->mxc_peer, MXLND_MSG_PUT_DATA, - rx->mxc_msg->mxm_u.put_ack.mxpam_dst_cookie); - } - /* repost == 1 */ - break; - } - case MXLND_MSG_GET_REQ: - ret = lnet_parse(kmxlnd_data.kmx_ni, &msg->mxm_u.get_req.mxgrm_hdr, - msg->mxm_srcnid, rx, 1); - repost = ret < 0; - break; - - case MXLND_MSG_CONN_REQ: - if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) { - CDEBUG(D_NETERROR, "Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(msg->mxm_srcnid), - libcfs_nid2str(msg->mxm_dstnid)); - goto cleanup; - } - if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "Can't accept %s: incompatible queue depth " - "%d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_queue_depth, - *kmxlnd_tunables.kmx_credits); - incompatible = 1; - } - if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) { - CDEBUG(D_NETERROR, "Can't accept %s: incompatible EAGER size " - "%d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_eager_size, - (int) MXLND_EAGER_SIZE); - incompatible = 1; - } - if (peer == NULL) { - peer = mxlnd_find_peer_by_nid(msg->mxm_srcnid); - if (peer == NULL) { - int hash = 0; - hash = mxlnd_nid_to_hash(msg->mxm_srcnid); - - mx_decompose_endpoint_addr(rx->mxc_status.source, - &nic_id, &ep_id); - rx->mxc_nid = msg->mxm_srcnid; - - ret = mxlnd_peer_alloc(&peer, msg->mxm_srcnid); - if (ret != 0) { - goto cleanup; - } - LASSERT(peer->mxp_host->mxh_ep_id == ep_id); - write_lock(&kmxlnd_data.kmx_peers_lock); - list_add_tail(&peer->mxp_peers, - &kmxlnd_data.kmx_peers[hash]); - write_unlock(&kmxlnd_data.kmx_peers_lock); - atomic_inc(&kmxlnd_data.kmx_npeers); - } else { - ret = mxlnd_conn_alloc(&conn, peer); - if (ret != 0) { - CDEBUG(D_NETERROR, "Cannot allocate mxp_conn\n"); - goto cleanup; - } - } - conn = peer->mxp_conn; - } else { - struct kmx_conn *old_conn = conn; - - /* do not call mx_disconnect() */ - mxlnd_conn_disconnect(old_conn, 0, 0); - - /* the ref for this rx was taken on the old_conn */ - mxlnd_conn_decref(old_conn); - - /* do not decref this conn below */ - decref = 0; - - /* This allocs a conn, points peer->mxp_conn to this one. - * The old conn is still on the peer->mxp_conns list. - * As the pending requests complete, they will call - * conn_decref() which will eventually free it. */ - ret = mxlnd_conn_alloc(&conn, peer); - if (ret != 0) { - CDEBUG(D_NETERROR, "Cannot allocate peer->mxp_conn\n"); - goto cleanup; - } - } - spin_lock(&peer->mxp_lock); - peer->mxp_incarnation = msg->mxm_srcstamp; - peer->mxp_incompatible = incompatible; - spin_unlock(&peer->mxp_lock); - spin_lock(&conn->mxk_lock); - conn->mxk_incarnation = msg->mxm_srcstamp; - conn->mxk_status = MXLND_CONN_WAIT; - spin_unlock(&conn->mxk_lock); - - /* handle_conn_ack() will create the CONN_ACK msg */ - mxlnd_iconnect(peer, MXLND_MASK_ICON_ACK); - - break; - - case MXLND_MSG_CONN_ACK: - if (!lnet_ptlcompat_matchnid(kmxlnd_data.kmx_ni->ni_nid, msg->mxm_dstnid)) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "bad dst nid %s\n", libcfs_nid2str(msg->mxm_srcnid), - libcfs_nid2str(msg->mxm_dstnid)); - ret = -1; - goto failed; - } - if (msg->mxm_u.conn_req.mxcrm_queue_depth != *kmxlnd_tunables.kmx_credits) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_queue_depth, - *kmxlnd_tunables.kmx_credits); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - incompatible = 1; - ret = -1; - } - if (msg->mxm_u.conn_req.mxcrm_eager_size != MXLND_EAGER_SIZE) { - CDEBUG(D_NETERROR, "Can't accept CONN_ACK from %s: " - "incompatible EAGER size %d (%d wanted)\n", - libcfs_nid2str(msg->mxm_srcnid), - msg->mxm_u.conn_req.mxcrm_eager_size, - (int) MXLND_EAGER_SIZE); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - incompatible = 1; - ret = -1; - } - spin_lock(&peer->mxp_lock); - peer->mxp_incarnation = msg->mxm_srcstamp; - peer->mxp_incompatible = incompatible; - spin_unlock(&peer->mxp_lock); - spin_lock(&conn->mxk_lock); - conn->mxk_credits = *kmxlnd_tunables.kmx_credits; - conn->mxk_outstanding = 0; - conn->mxk_incarnation = msg->mxm_srcstamp; - conn->mxk_timeout = 0; - if (!incompatible) { - conn->mxk_status = MXLND_CONN_READY; - } - spin_unlock(&conn->mxk_lock); - if (incompatible) mxlnd_conn_disconnect(conn, 0, 1); - break; - - default: - CDEBUG(D_NETERROR, "Bad MXLND message type %x from %s\n", msg->mxm_type, - libcfs_nid2str(rx->mxc_nid)); - ret = -EPROTO; - break; - } - -failed: - if (ret < 0) { - MXLND_PRINT("setting PEER_CONN_FAILED\n"); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - } - -cleanup: - if (conn != NULL) { - spin_lock(&conn->mxk_lock); - conn->mxk_last_rx = cfs_time_current(); /* jiffies */ - spin_unlock(&conn->mxk_lock); - } - - if (repost) { - /* lnet_parse() failed, etc., repost now */ - mxlnd_put_idle_rx(rx); - if (conn != NULL && credit == 1) { - if (type == MXLND_MSG_PUT_DATA) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } else if (type != MXLND_MSG_GET_DATA && - (type == MXLND_MSG_EAGER || - type == MXLND_MSG_PUT_REQ || - type == MXLND_MSG_NOOP)) { - spin_lock(&conn->mxk_lock); - conn->mxk_outstanding++; - spin_unlock(&conn->mxk_lock); - } - } - if (decref) mxlnd_conn_decref(conn); - } - - if (type == MXLND_MSG_PUT_DATA || type == MXLND_MSG_GET_DATA) { - CDEBUG(D_NET, "leaving for rx (0x%llx)\n", bits); - } else { - CDEBUG(D_NET, "leaving for rx (0x%llx)\n", seq); - } - - if (lntmsg[0] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[0], result); - if (lntmsg[1] != NULL) lnet_finalize(kmxlnd_data.kmx_ni, lntmsg[1], result); - - if (conn != NULL && credit == 1) mxlnd_check_sends(peer); - - return; -} - - - -void -mxlnd_handle_conn_req(struct kmx_peer *peer, mx_status_t status) -{ - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_conn *conn = peer->mxp_conn; - - /* a conn ref was taken when calling mx_iconnect(), - * hold it until CONN_REQ or CONN_ACK completes */ - - CDEBUG(D_NET, "entering\n"); - if (status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "mx_iconnect() failed with %s (%d) to %s\n", - mx_strstatus(status.code), status.code, - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - struct kmx_conn *new_conn = NULL; - CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n"); - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&new_conn, peer); - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - } - - mxlnd_conn_decref(conn); - return; - } - - spin_lock(&conn->mxk_lock); - conn->mxk_epa = status.source; - spin_unlock(&conn->mxk_lock); - mx_set_endpoint_addr_context(conn->mxk_epa, (void *) conn); - - /* mx_iconnect() succeeded, reset delay to 0 */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - - /* marshal CONN_REQ msg */ - /* we are still using the conn ref from iconnect() - do not take another */ - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate CONN_REQ tx for %s\n", - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - return; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; - mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_REQ, sizeof(kmx_connreq_msg_t), peer->mxp_nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits; - txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE; - tx->mxc_match = mxlnd_create_match(tx, 0); - - CDEBUG(D_NET, "sending MXLND_MSG_CONN_REQ\n"); - mxlnd_queue_tx(tx); - return; -} - -void -mxlnd_handle_conn_ack(struct kmx_peer *peer, mx_status_t status) -{ - struct kmx_ctx *tx = NULL; - struct kmx_msg *txmsg = NULL; - struct kmx_conn *conn = peer->mxp_conn; - - /* a conn ref was taken when calling mx_iconnect(), - * hold it until CONN_REQ or CONN_ACK completes */ - - CDEBUG(D_NET, "entering\n"); - if (status.code != MX_STATUS_SUCCESS) { - struct kmx_conn *conn = peer->mxp_conn; - CDEBUG(D_NETERROR, "mx_iconnect() failed for CONN_ACK with %s (%d) " - "to %s mxp_nid = 0x%llx mxp_nic_id = 0x%0llx mxh_ep_id = %d\n", - mx_strstatus(status.code), status.code, - libcfs_nid2str(peer->mxp_nid), - peer->mxp_nid, - peer->mxp_nic_id, - peer->mxp_host->mxh_ep_id); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - - if (time_after(jiffies, peer->mxp_reconnect_time + MXLND_WAIT_TIMEOUT)) { - struct kmx_conn *new_conn = NULL; - CDEBUG(D_NETERROR, "timeout, calling conn_disconnect()\n"); - mxlnd_conn_disconnect(conn, 0, 1); - mxlnd_conn_alloc(&new_conn, peer); - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - } - - mxlnd_conn_decref(conn); - return; - } - spin_lock(&conn->mxk_lock); - conn->mxk_epa = status.source; - if (likely(!peer->mxp_incompatible)) { - conn->mxk_status = MXLND_CONN_READY; - } - spin_unlock(&conn->mxk_lock); - mx_set_endpoint_addr_context(conn->mxk_epa, (void *) conn); - - /* mx_iconnect() succeeded, reset delay to 0 */ - spin_lock(&peer->mxp_lock); - peer->mxp_reconnect_time = 0; - spin_unlock(&peer->mxp_lock); - - /* marshal CONN_ACK msg */ - tx = mxlnd_get_idle_tx(); - if (tx == NULL) { - CDEBUG(D_NETERROR, "Can't allocate CONN_ACK tx for %s\n", - libcfs_nid2str(peer->mxp_nid)); - spin_lock(&conn->mxk_lock); - conn->mxk_status = MXLND_CONN_FAIL; - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - return; - } - - tx->mxc_peer = peer; - tx->mxc_conn = conn; - CDEBUG(D_NET, "sending MXLND_MSG_CONN_ACK\n"); - mxlnd_init_tx_msg (tx, MXLND_MSG_CONN_ACK, sizeof(kmx_connreq_msg_t), peer->mxp_nid); - txmsg = tx->mxc_msg; - txmsg->mxm_u.conn_req.mxcrm_queue_depth = *kmxlnd_tunables.kmx_credits; - txmsg->mxm_u.conn_req.mxcrm_eager_size = MXLND_EAGER_SIZE; - tx->mxc_match = mxlnd_create_match(tx, 0); - - mxlnd_queue_tx(tx); - return; -} - -/** - * mxlnd_request_waitd - the MX request completion thread(s) - * @arg - thread id (as a void *) - * - * This thread waits for a MX completion and then completes the request. - * We will create one thread per CPU. - */ -int -mxlnd_request_waitd(void *arg) -{ - long id = (long) arg; - char name[24]; - __u32 result = 0; - mx_return_t mxret = MX_SUCCESS; - mx_status_t status; - struct kmx_ctx *ctx = NULL; - enum kmx_req_state req_type = MXLND_REQ_TX; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; -#if MXLND_POLLING - int count = 0; -#endif - - memset(name, 0, sizeof(name)); - snprintf(name, sizeof(name), "mxlnd_request_waitd_%02ld", id); - cfs_daemonize(name); - //cfs_block_allsigs(); - - memset(&status, 0, sizeof(status)); - - CDEBUG(D_NET, "%s starting\n", name); - - while (!kmxlnd_data.kmx_shutdown) { - mxret = MX_SUCCESS; - result = 0; -#if MXLND_POLLING - if (id == 0 && count++ < *kmxlnd_tunables.kmx_polling) { - mxret = mx_test_any(kmxlnd_data.kmx_endpt, 0LL, 0LL, - &status, &result); - } else { - count = 0; - mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT, - 0LL, 0LL, &status, &result); - } -#else - mxret = mx_wait_any(kmxlnd_data.kmx_endpt, MXLND_WAIT_TIMEOUT, - 0LL, 0LL, &status, &result); -#endif - if (unlikely(kmxlnd_data.kmx_shutdown)) - break; - - if (result != 1) { - /* nothing completed... */ - continue; - } - - if (status.code != MX_STATUS_SUCCESS) { - CDEBUG(D_NETERROR, "wait_any() failed with %s (%d) with " - "match_info 0x%llx and length %d\n", - mx_strstatus(status.code), status.code, - (u64) status.match_info, status.msg_length); - } - - /* This may be a mx_iconnect() request completing, - * check the bit mask for CONN_REQ and CONN_ACK */ - if (status.match_info == MXLND_MASK_ICON_REQ || - status.match_info == MXLND_MASK_ICON_ACK) { - peer = (struct kmx_peer*) status.context; - if (status.match_info == MXLND_MASK_ICON_REQ) { - mxlnd_handle_conn_req(peer, status); - } else { - mxlnd_handle_conn_ack(peer, status); - } - continue; - } - - /* This must be a tx or rx */ - - /* NOTE: if this is a RX from the unexpected callback, it may - * have very little info. If we dropped it in unexpected_recv(), - * it will not have a context. If so, ignore it. */ - ctx = (struct kmx_ctx *) status.context; - if (ctx != NULL) { - - req_type = ctx->mxc_type; - conn = ctx->mxc_conn; /* this may be NULL */ - mxlnd_deq_pending_ctx(ctx); - - /* copy status to ctx->mxc_status */ - memcpy(&ctx->mxc_status, &status, sizeof(status)); - - switch (req_type) { - case MXLND_REQ_TX: - mxlnd_handle_tx_completion(ctx); - break; - case MXLND_REQ_RX: - mxlnd_handle_rx_completion(ctx); - break; - default: - CDEBUG(D_NETERROR, "Unknown ctx type %d\n", req_type); - LBUG(); - break; - } - - /* conn is always set except for the first CONN_REQ rx - * from a new peer */ - if (!(status.code == MX_STATUS_SUCCESS || - status.code == MX_STATUS_TRUNCATED) && - conn != NULL) { - mxlnd_conn_disconnect(conn, 1, 1); - } - } - CDEBUG(D_NET, "waitd() completed task\n"); - } - CDEBUG(D_NET, "%s stopping\n", name); - mxlnd_thread_stop(id); - return 0; -} - - -unsigned long -mxlnd_check_timeouts(unsigned long now) -{ - int i = 0; - int disconnect = 0; - unsigned long next = 0; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - - if (unlikely(kmxlnd_data.kmx_shutdown)) - return next; - - conn = peer->mxp_conn; - if (conn == NULL) - continue; - - mxlnd_conn_addref(conn); - spin_lock(&conn->mxk_lock); - - /* if nothing pending (timeout == 0) or - * if conn is already disconnected, - * skip this conn */ - if (conn->mxk_timeout == 0 || - conn->mxk_status == MXLND_CONN_DISCONNECT) { - spin_unlock(&conn->mxk_lock); - mxlnd_conn_decref(conn); - continue; - } - - /* we want to find the timeout that will occur first. - * if it is in the future, we will sleep until then. - * if it is in the past, then we will sleep one - * second and repeat the process. */ - if ((next == 0) || (conn->mxk_timeout < next)) { - next = conn->mxk_timeout; - } - - disconnect = 0; - - if (time_after_eq(now, conn->mxk_timeout)) { - disconnect = 1; - } - spin_unlock(&conn->mxk_lock); - - if (disconnect) { - mxlnd_conn_disconnect(conn, 1, 1); - } - mxlnd_conn_decref(conn); - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - if (next == 0) next = now + MXLND_COMM_TIMEOUT; - - return next; -} - -/** - * mxlnd_timeoutd - enforces timeouts on messages - * @arg - thread id (as a void *) - * - * This thread queries each peer for its earliest timeout. If a peer has timed out, - * it calls mxlnd_conn_disconnect(). - * - * After checking for timeouts, try progressing sends (call check_sends()). - */ -int -mxlnd_timeoutd(void *arg) -{ - int i = 0; - long id = (long) arg; - unsigned long now = 0; - unsigned long next = 0; - unsigned long delay = HZ; - struct kmx_peer *peer = NULL; - struct kmx_conn *conn = NULL; - - cfs_daemonize("mxlnd_timeoutd"); - //cfs_block_allsigs(); - - CDEBUG(D_NET, "timeoutd starting\n"); - - while (!kmxlnd_data.kmx_shutdown) { - - now = jiffies; - /* if the next timeout has not arrived, go back to sleep */ - if (time_after(now, next)) { - next = mxlnd_check_timeouts(now); - } - - read_lock(&kmxlnd_data.kmx_peers_lock); - for (i = 0; i < MXLND_HASH_SIZE; i++) { - list_for_each_entry(peer, &kmxlnd_data.kmx_peers[i], mxp_peers) { - conn = peer->mxp_conn; - if (conn == NULL) - continue; - - if (conn->mxk_status != MXLND_CONN_DISCONNECT && - time_after(now, conn->mxk_last_tx + HZ)) { - mxlnd_check_sends(peer); - } - } - } - read_unlock(&kmxlnd_data.kmx_peers_lock); - - mxlnd_sleep(delay); - } - CDEBUG(D_NET, "timeoutd stopping\n"); - mxlnd_thread_stop(id); - return 0; -} diff --git a/lnet/klnds/mxlnd/mxlnd_modparams.c b/lnet/klnds/mxlnd/mxlnd_modparams.c deleted file mode 100644 index 37d77f1730248ac91ba7b6637dc84a23da2eb6d9..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_modparams.c +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "mxlnd.h" - -static int n_waitd = MXLND_N_SCHED; -CFS_MODULE_PARM(n_waitd, "i", int, 0444, - "# of completion daemons"); - -static int max_peers = MXLND_MAX_PEERS; -CFS_MODULE_PARM(max_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = MXLND_CKSUM; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not data payload) checksums"); - -static int ntx = MXLND_NTX; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of total tx message descriptors"); - -static int credits = MXLND_MSG_QUEUE_DEPTH; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int board = MXLND_MX_BOARD; -CFS_MODULE_PARM(board, "i", int, 0444, - "index value of the Myrinet board (NIC)"); - -static int ep_id = MXLND_MX_EP_ID; -CFS_MODULE_PARM(ep_id, "i", int, 0444, - "MX endpoint ID"); - -static int polling = MXLND_POLLING; -CFS_MODULE_PARM(polling, "i", int, 0444, - "Use 0 to block (wait). A value > 0 will poll that many times before blocking"); - -static char *hosts = NULL; -CFS_MODULE_PARM(hosts, "s", charp, 0444, - "IP-to-hostname resolution file"); - -kmx_tunables_t kmxlnd_tunables = { - .kmx_n_waitd = &n_waitd, - .kmx_max_peers = &max_peers, - .kmx_cksum = &cksum, - .kmx_ntx = &ntx, - .kmx_credits = &credits, - .kmx_board = &board, - .kmx_ep_id = &ep_id, - .kmx_polling = &polling, - .kmx_hosts = &hosts -}; diff --git a/lnet/klnds/mxlnd/mxlnd_wire.h b/lnet/klnds/mxlnd/mxlnd_wire.h deleted file mode 100644 index a929608409e3fc57b18caca380fdb19fb0af5f21..0000000000000000000000000000000000000000 --- a/lnet/klnds/mxlnd/mxlnd_wire.h +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Copyright (C) 2006 Myricom, Inc. - * Author: Scott Atchley <atchley at myri.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * MXLND wire format - sent in sender's byte order - */ - -typedef struct kmx_connreq_msg -{ - u32 mxcrm_queue_depth; /* per peer max messages in flight */ - u32 mxcrm_eager_size; /* size of preposted eager messages */ -} WIRE_ATTR kmx_connreq_msg_t; - -typedef struct kmx_eager_msg -{ - lnet_hdr_t mxem_hdr; /* lnet header */ - char mxem_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kmx_eager_msg_t; - -typedef struct kmx_putreq_msg -{ - lnet_hdr_t mxprm_hdr; /* lnet header */ - u64 mxprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_putreq_msg_t; - -typedef struct kmx_putack_msg -{ - u64 mxpam_src_cookie; /* reflected completion cookie */ - u64 mxpam_dst_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_putack_msg_t; - -typedef struct kmx_getreq_msg -{ - lnet_hdr_t mxgrm_hdr; /* lnet header */ - u64 mxgrm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kmx_getreq_msg_t; - -typedef struct kmx_msg -{ - /* First two fields fixed for all time */ - u32 mxm_magic; /* MXLND message */ - u16 mxm_version; /* version number */ - - u8 mxm_type; /* message type */ - u8 mxm_credits; /* returned credits */ - u32 mxm_nob; /* # of bytes in whole message */ - u32 mxm_cksum; /* checksum (0 == no checksum) */ - u64 mxm_srcnid; /* sender's NID */ - u64 mxm_srcstamp; /* sender's incarnation */ - u64 mxm_dstnid; /* destination's NID */ - u64 mxm_dststamp; /* destination's incarnation */ - u64 mxm_seq; /* sequence number */ - - union { - kmx_connreq_msg_t conn_req; - kmx_eager_msg_t eager; - kmx_putreq_msg_t put_req; - kmx_putack_msg_t put_ack; - kmx_getreq_msg_t get_req; - } WIRE_ATTR mxm_u; -} WIRE_ATTR kmx_msg_t; - -#define MXLND_MSG_MAGIC 0x4d583130 /* unique magic 'MX10' */ -#define MXLND_MSG_VERSION 0x01 - -#define MXLND_MSG_CONN_REQ 0xc /* connection request */ -#define MXLND_MSG_CONN_ACK 0xa /* connection request response */ -#define MXLND_MSG_EAGER 0xe /* eager message */ -#define MXLND_MSG_NOOP 0x1 /* no msg, return credits */ -#define MXLND_MSG_PUT_REQ 0x2 /* put request src->sink */ -#define MXLND_MSG_PUT_ACK 0x3 /* put ack src<-sink */ -#define MXLND_MSG_PUT_DATA 0x4 /* put payload src->sink */ -#define MXLND_MSG_GET_REQ 0x5 /* get request sink->src */ -#define MXLND_MSG_GET_DATA 0x6 /* get payload sink<-src */ diff --git a/lnet/klnds/o2iblnd/.cvsignore b/lnet/klnds/o2iblnd/.cvsignore deleted file mode 100644 index 2e9b6f47052e4a9724b08b6336229b01d72676a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/o2iblnd/Makefile.in b/lnet/klnds/o2iblnd/Makefile.in deleted file mode 100644 index 52a194d19dc611e70d89873358ab0bfe0576f393..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := ko2iblnd -ko2iblnd-objs := o2iblnd.o o2iblnd_cb.o o2iblnd_modparams.o - -EXTRA_POST_CFLAGS := @O2IBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/o2iblnd/autoMakefile.am b/lnet/klnds/o2iblnd/autoMakefile.am deleted file mode 100644 index 83788fd2ee37a895a37f4d26fc9d988b5bfe8b02..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_O2IBLND -modulenet_DATA = ko2iblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(ko2iblnd-objs:%.o=%.c) o2iblnd.h diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c deleted file mode 100644 index ded32d63e0f36ece3f8bf58de55406a4f59357cc..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ /dev/null @@ -1,1710 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = O2IBLND, - .lnd_startup = kiblnd_startup, - .lnd_shutdown = kiblnd_shutdown, - .lnd_ctl = kiblnd_ctl, - .lnd_send = kiblnd_send, - .lnd_recv = kiblnd_recv, -}; - -kib_data_t kiblnd_data; - -__u32 -kiblnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, - int credits, lnet_nid_t dstnid, __u64 dststamp) -{ - kib_net_t *net = ni->ni_data; - - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBLND_MSG_MAGIC; - msg->ibm_version = IBLND_MSG_VERSION; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, dstnid); - msg->ibm_srcstamp = net->ibn_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kiblnd_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kiblnd_cksum(msg, msg->ibm_nob); - } -} - -int -kiblnd_unpack_msg(kib_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - int flip; - int msg_nob; -#if !IBLND_MAP_ON_DEMAND - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - if (msg->ibm_version != - (flip ? __swab16(IBLND_MSG_VERSION) : IBLND_MSG_VERSION)) { - CERROR("Bad version: %d\n", msg->ibm_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kiblnd_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - __swab16s(&msg->ibm_version); - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBLND_MSG_NOOP: - break; - - case IBLND_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBLND_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBLND_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBLND_MAP_ON_DEMAND - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrags); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr); - } - } -#endif - break; - - case IBLND_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBLND_MAP_ON_DEMAND - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrags); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrags; - if (n <= 0 || n > IBLND_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBLND_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrags; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr); - } -#endif - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBLND_MSG_CONNREQ: - case IBLND_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab16s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab16s(&msg->ibm_u.connparams.ibcp_max_frags); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - } - break; - } - return 0; -} - -int -kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_net_t *net = ni->ni_data; - unsigned long flags; - - LASSERT (net != NULL); - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof(*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_ni = ni; - peer->ibp_nid = nid; - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - atomic_set(&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD(&peer->ibp_conns); - INIT_LIST_HEAD(&peer->ibp_tx_queue); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* always called with a ref on ni, which prevents ni being shutdown */ - LASSERT (net->ibn_shutdown == 0); - - /* npeers only grows with the global lock held */ - atomic_inc(&net->ibn_npeers); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - *peerp = peer; - return 0; -} - -void -kiblnd_destroy_peer (kib_peer_t *peer) -{ - kib_net_t *net = peer->ibp_ni->ni_data; - - LASSERT (net != NULL); - LASSERT (atomic_read(&peer->ibp_refcount) == 0); - LASSERT (!kiblnd_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty(&peer->ibp_conns)); - LASSERT (list_empty(&peer->ibp_tx_queue)); - - LIBCFS_FREE(peer, sizeof(*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&net->ibn_npeers); -} - -void -kiblnd_destroy_dev (kib_dev_t *dev) -{ - LASSERT (dev->ibd_nnets == 0); - - if (!list_empty(&dev->ibd_list)) /* on kib_devs? */ - list_del_init(&dev->ibd_list); - - if (dev->ibd_mr != NULL) - ib_dereg_mr(dev->ibd_mr); - - if (dev->ibd_pd != NULL) - ib_dealloc_pd(dev->ibd_pd); - - if (dev->ibd_cmid != NULL) - rdma_destroy_id(dev->ibd_cmid); - - LIBCFS_FREE(dev, sizeof(*dev)); -} - -kib_peer_t * -kiblnd_find_peer_locked (lnet_nid_t nid) -{ - /* the caller is responsible for accounting the additional reference - * that this creates */ - struct list_head *peer_list = kiblnd_nid2peerlist(nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry(tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_connecting > 0 || /* creating conns */ - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->ibp_refcount)); - return peer; - } - return NULL; -} - -void -kiblnd_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kiblnd_peer_active(peer)); - list_del_init(&peer->ibp_list); - /* lose peerlist's ref */ - kiblnd_peer_decref(peer); -} - -int -kiblnd_get_peer_info (lnet_ni_t *ni, int index, - lnet_nid_t *nidp, int *count) -{ - kib_peer_t *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *count = atomic_read(&peer->ibp_refcount); - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return 0; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return -ENOENT; -} - -void -kiblnd_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - if (list_empty(&peer->ibp_conns)) { - kiblnd_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kiblnd_close_conn_locked(conn, 0); - } - /* NB closing peer's last conn unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kiblnd_del_peer (lnet_ni_t *ni, lnet_nid_t nid) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) { - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - } else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kiblnd_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_txlist_done(ni, &zombies, -EIO); - - return rc; -} - -kib_conn_t * -kiblnd_get_conn_by_idx (lnet_ni_t *ni, int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, kib_conn_t, ibc_list); - kiblnd_conn_addref(conn); - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return conn; - } - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return NULL; -} - -void -kiblnd_debug_rx (kib_rx_t *rx) -{ - CDEBUG(D_CONSOLE, " %p status %d msg_type %x cred %d\n", - rx, rx->rx_status, rx->rx_msg->ibm_type, - rx->rx_msg->ibm_credits); -} - -void -kiblnd_debug_tx (kib_tx_t *tx) -{ - CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx " - "cookie "LPX64" msg %s%s type %x cred %d\n", - tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, - tx->tx_status, tx->tx_deadline, tx->tx_cookie, - tx->tx_lntmsg[0] == NULL ? "-" : "!", - tx->tx_lntmsg[1] == NULL ? "-" : "!", - tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits); -} - -void -kiblnd_debug_conn (kib_conn_t *conn) -{ - struct list_head *tmp; - int i; - - spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", - atomic_read(&conn->ibc_refcount), conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - CDEBUG(D_CONSOLE, " state %d nposted %d cred %d o_cred %d r_cred %d\n", - conn->ibc_state, conn->ibc_nsends_posted, conn->ibc_credits, - conn->ibc_outstanding_credits, conn->ibc_reserved_credits); - CDEBUG(D_CONSOLE, " comms_err %d\n", conn->ibc_comms_error); - - CDEBUG(D_CONSOLE, " early_rxs:\n"); - list_for_each(tmp, &conn->ibc_early_rxs) - kiblnd_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_nocred) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue:\n"); - list_for_each(tmp, &conn->ibc_tx_queue) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " active_txs:\n"); - list_for_each(tmp, &conn->ibc_active_txs) - kiblnd_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " rxs:\n"); - for (i = 0; i < IBLND_RX_MSGS; i++) - kiblnd_debug_rx(&conn->ibc_rxs[i]); - - spin_unlock(&conn->ibc_lock); -} - -kib_conn_t * -kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, int state) -{ - /* CAVEAT EMPTOR: - * If the new conn is created successfully it takes over the caller's - * ref on 'peer'. It also "owns" 'cmid' and destroys it when it itself - * is destroyed. On failure, the caller's ref on 'peer' remains and - * she must dispose of 'cmid'. (Actually I'd block forever if I tried - * to destroy 'cmid' here since I'm called from the CM which still has - * its ref on 'cmid'). */ - kib_conn_t *conn; - kib_net_t *net = peer->ibp_ni->ni_data; - int i; - int page_offset; - int ipage; - int rc; - struct ib_cq *cq; - struct ib_qp_init_attr *init_qp_attr; - unsigned long flags; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - - LIBCFS_ALLOC(init_qp_attr, sizeof(*init_qp_attr)); - if (init_qp_attr == NULL) { - CERROR("Can't allocate qp_attr for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_0; - } - - LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - CERROR("Can't allocate connection for %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed_1; - } - - memset(conn, 0, sizeof(*conn)); /* zero flags, NULL pointers etc... */ - - conn->ibc_state = IBLND_CONN_INIT; - conn->ibc_peer = peer; /* I take the caller's ref */ - cmid->context = conn; /* for future CM callbacks */ - conn->ibc_cmid = cmid; - - INIT_LIST_HEAD(&conn->ibc_early_rxs); - INIT_LIST_HEAD(&conn->ibc_tx_queue); - INIT_LIST_HEAD(&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD(&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD(&conn->ibc_active_txs); - spin_lock_init(&conn->ibc_lock); - - LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed_2; - } - memset(conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBLND_RX_MSGS * sizeof(kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX buffers\n"); - goto failed_2; - } - memset(conn->ibc_rxs, 0, IBLND_RX_MSGS * sizeof(kib_rx_t)); - - rc = kiblnd_alloc_pages(&conn->ibc_rx_pages, IBLND_RX_MSG_PAGES); - if (rc != 0) - goto failed_2; - - for (i = ipage = page_offset = 0; i < IBLND_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - rx->rx_msgaddr = dma_map_single(cmid->device->dma_device, - rx->rx_msg, - IBLND_MSG_SIZE, - DMA_FROM_DEVICE); - pci_unmap_addr_set(rx, rx_msgunmap, rx->rx_msgaddr); - - CDEBUG(D_NET,"rx %d: %p "LPX64"("LPX64")\n", - i, rx->rx_msg, rx->rx_msgaddr, - lnet_page2phys(page) + page_offset); - - page_offset += IBLND_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBLND_RX_MSG_PAGES); - } - } - - cq = ib_create_cq(cmid->device, - kiblnd_cq_completion, kiblnd_cq_event, conn, - IBLND_CQ_ENTRIES()); - if (!IS_ERR(cq)) { - conn->ibc_cq = cq; - } else { - CERROR("Can't create CQ: %ld\n", PTR_ERR(cq)); - goto failed_2; - } - - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc != 0) { - CERROR("Can't request completion notificiation: %d\n", rc); - goto failed_2; - } - - memset(init_qp_attr, 0, sizeof(*init_qp_attr)); - init_qp_attr->event_handler = kiblnd_qp_event; - init_qp_attr->qp_context = conn; - init_qp_attr->cap.max_send_wr = (*kiblnd_tunables.kib_concurrent_sends) * - (1 + IBLND_MAX_RDMA_FRAGS); - init_qp_attr->cap.max_recv_wr = IBLND_RX_MSGS; - init_qp_attr->cap.max_send_sge = 1; - init_qp_attr->cap.max_recv_sge = 1; - init_qp_attr->sq_sig_type = IB_SIGNAL_REQ_WR; - init_qp_attr->qp_type = IB_QPT_RC; - init_qp_attr->send_cq = cq; - init_qp_attr->recv_cq = cq; - - rc = 0; - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - switch (*kiblnd_tunables.kib_ib_mtu) { - default: - rc = *kiblnd_tunables.kib_ib_mtu; - /* fall through to... */ - case 0: /* set tunable to the default - * CAVEAT EMPTOR! this assumes the default is one of the MTUs - * below, otherwise we'll WARN on the next QP create */ - *kiblnd_tunables.kib_ib_mtu = - ib_mtu_enum_to_int(cmid->route.path_rec->mtu); - break; - case 256: - cmid->route.path_rec->mtu = IB_MTU_256; - break; - case 512: - cmid->route.path_rec->mtu = IB_MTU_512; - break; - case 1024: - cmid->route.path_rec->mtu = IB_MTU_1024; - break; - case 2048: - cmid->route.path_rec->mtu = IB_MTU_2048; - break; - case 4096: - cmid->route.path_rec->mtu = IB_MTU_4096; - break; - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc != 0) - CWARN("Invalid IB MTU value %d, using default value %d\n", - rc, *kiblnd_tunables.kib_ib_mtu); - - rc = rdma_create_qp(cmid, net->ibn_dev->ibd_pd, init_qp_attr); - if (rc != 0) { - CERROR("Can't create QP: %d\n", rc); - goto failed_2; - } - - LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); - - /* 1 ref for caller and each rxmsg */ - atomic_set(&conn->ibc_refcount, 1 + IBLND_RX_MSGS); - conn->ibc_nrx = IBLND_RX_MSGS; - - /* post receives */ - for (i = 0; i < IBLND_RX_MSGS; i++) { - rc = kiblnd_post_rx(&conn->ibc_rxs[i], - IBLND_POSTRX_NO_CREDIT); - if (rc != 0) { - CERROR("Can't post rxmsg: %d\n", rc); - - /* Make posted receives complete */ - kiblnd_abort_receives(conn); - - /* correct # of posted buffers - * NB locking needed now I'm racing with completion */ - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - conn->ibc_nrx -= IBLND_RX_MSGS - i; - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - /* Drop my own and unused rxbuffer refcounts */ - while (i++ <= IBLND_RX_MSGS) - kiblnd_conn_decref(conn); - - return NULL; - } - } - - /* Init successful! */ - LASSERT (state == IBLND_CONN_ACTIVE_CONNECT || - state == IBLND_CONN_PASSIVE_WAIT); - conn->ibc_state = state; - - /* 1 more conn */ - atomic_inc(&net->ibn_nconns); - return conn; - - failed_2: - kiblnd_destroy_conn(conn); - failed_1: - LIBCFS_FREE(init_qp_attr, sizeof(*init_qp_attr)); - failed_0: - return NULL; -} - -void -kiblnd_destroy_conn (kib_conn_t *conn) -{ - struct rdma_cm_id *cmid = conn->ibc_cmid; - kib_peer_t *peer = conn->ibc_peer; - int rc; - int i; - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBLND_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT (conn->ibc_connvars == NULL); - break; - - case IBLND_CONN_INIT: - break; - } - - if (conn->ibc_cmid->qp != NULL) - rdma_destroy_qp(conn->ibc_cmid); - - if (conn->ibc_cq != NULL) { - rc = ib_destroy_cq(conn->ibc_cq); - if (rc != 0) - CWARN("Error destroying CQ: %d\n", rc); - } - - if (conn->ibc_rx_pages != NULL) { - LASSERT (conn->ibc_rxs != NULL); - - for (i = 0; i < IBLND_RX_MSGS; i++) { - kib_rx_t *rx = &conn->ibc_rxs[i]; - - LASSERT (rx->rx_nob >= 0); /* not posted */ - - dma_unmap_single(conn->ibc_cmid->device->dma_device, - pci_unmap_addr(rx, rx_msgunmap), - IBLND_MSG_SIZE, DMA_FROM_DEVICE); - } - - kiblnd_free_pages(conn->ibc_rx_pages); - } - - if (conn->ibc_rxs != NULL) { - LIBCFS_FREE(conn->ibc_rxs, - IBLND_RX_MSGS * sizeof(kib_rx_t)); - } - - if (conn->ibc_connvars != NULL) - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - - /* See CAVEAT EMPTOR above in kiblnd_create_conn */ - if (conn->ibc_state != IBLND_CONN_INIT) { - kib_net_t *net = peer->ibp_ni->ni_data; - - kiblnd_peer_decref(peer); - rdma_destroy_id(cmid); - atomic_dec(&net->ibn_nconns); - } - - LIBCFS_FREE(conn, sizeof(*conn)); -} - -int -kiblnd_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - count++; - kiblnd_close_conn_locked(conn, why); - } - - return count; -} - -int -kiblnd_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kiblnd_close_conn_locked(conn, -ESTALE); - } - - return count; -} - -int -kiblnd_close_matching_conns (lnet_ni_t *ni, lnet_nid_t nid) -{ - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kiblnd_nid2peerlist(nid) - kiblnd_data.kib_peers; - else { - lo = 0; - hi = kiblnd_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kiblnd_data.kib_peers[i]) { - - peer = list_entry(ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_connecting > 0 || - peer->ibp_accepting > 0 || - !list_empty(&peer->ibp_conns)); - - if (peer->ibp_ni != ni) - continue; - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kiblnd_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return (count == 0) ? -ENOENT : 0; -} - -int -kiblnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - int count = 0; - - rc = kiblnd_get_peer_info(ni, data->ioc_count, - &nid, &count); - data->ioc_nid = nid; - data->ioc_count = count; - break; - } - - case IOC_LIBCFS_DEL_PEER: { - rc = kiblnd_del_peer(ni, data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kiblnd_get_conn_by_idx(ni, data->ioc_count); - - if (conn == NULL) { - rc = -ENOENT; - } else { - // kiblnd_debug_conn(conn); - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kiblnd_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kiblnd_close_matching_conns(ni, data->ioc_nid); - break; - } - - default: - break; - } - - return rc; -} - -void -kiblnd_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kiblnd_alloc_pages (kib_pages_t **pp, int npages) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR("Can't allocate descriptor for %d pages\n", npages); - return -ENOMEM; - } - - memset(p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page(GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR("Can't allocate page %d of %d\n", i, npages); - kiblnd_free_pages(p); - return -ENOMEM; - } - } - - *pp = p; - return 0; -} - -void -kiblnd_free_tx_descs (lnet_ni_t *ni) -{ - int i; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - if (net->ibn_tx_descs != NULL) { - for (i = 0; i < IBLND_TX_MSGS(); i++) { - kib_tx_t *tx = &net->ibn_tx_descs[i]; - -#if IBLND_MAP_ON_DEMAND - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_sge != NULL) - LIBCFS_FREE(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBLND_MAX_RDMA_FRAGS])); - - if (tx->tx_frags != NULL) - LIBCFS_FREE(tx->tx_frags, - IBLND_MAX_RDMA_FRAGS * - sizeof(*tx->tx_frags)); -#endif - } - - LIBCFS_FREE(net->ibn_tx_descs, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - } - - if (net->ibn_tx_pages != NULL) - kiblnd_free_pages(net->ibn_tx_pages); -} - -int -kiblnd_alloc_tx_descs (lnet_ni_t *ni) -{ - int i; - int rc; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - rc = kiblnd_alloc_pages(&net->ibn_tx_pages, IBLND_TX_MSG_PAGES()); - - if (rc != 0) { - CERROR("Can't allocate tx pages\n"); - return rc; - } - - LIBCFS_ALLOC (net->ibn_tx_descs, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - if (net->ibn_tx_descs == NULL) { - CERROR("Can't allocate %d tx descriptors\n", IBLND_TX_MSGS()); - return -ENOMEM; - } - - memset(net->ibn_tx_descs, 0, - IBLND_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - kib_tx_t *tx = &net->ibn_tx_descs[i]; - -#if IBLND_MAP_ON_DEMAND - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) { - CERROR("Can't allocate phys page vector[%d]\n", - LNET_MAX_IOV); - return -ENOMEM; - } -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_sge, - (1 + IBLND_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_sge)); - if (tx->tx_sge == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBLND_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_frags, - IBLND_MAX_RDMA_FRAGS * - sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kiblnd_unmap_tx_descs (lnet_ni_t *ni) -{ - int i; - kib_tx_t *tx; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - tx = &net->ibn_tx_descs[i]; - - dma_unmap_single(net->ibn_dev->ibd_cmid->device->dma_device, - pci_unmap_addr(tx, tx_msgunmap), - IBLND_MSG_SIZE, DMA_TO_DEVICE); - } -} - -void -kiblnd_map_tx_descs (lnet_ni_t *ni) -{ - int ipage = 0; - int page_offset = 0; - int i; - struct page *page; - kib_tx_t *tx; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBLND_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBLND_MSG_SIZE == 0); - - for (i = 0; i < IBLND_TX_MSGS(); i++) { - page = net->ibn_tx_pages->ibp_pages[ipage]; - tx = &net->ibn_tx_descs[i]; - - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - tx->tx_msgaddr = dma_map_single( - net->ibn_dev->ibd_cmid->device->dma_device, - tx->tx_msg, IBLND_MSG_SIZE, DMA_TO_DEVICE); - pci_unmap_addr_set(tx, tx_msgunmap, tx->tx_msgaddr); - - list_add(&tx->tx_list, &net->ibn_idle_txs); - - page_offset += IBLND_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBLND_TX_MSG_PAGES()); - } - } -} - -void -kiblnd_base_shutdown (void) -{ - int i; - - LASSERT (list_empty(&kiblnd_data.kib_devs)); - - CDEBUG(D_MALLOC, "before LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - switch (kiblnd_data.kib_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - case IBLND_INIT_DATA: - LASSERT (kiblnd_data.kib_peers != NULL); - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) { - LASSERT (list_empty(&kiblnd_data.kib_peers[i])); - } - LASSERT (list_empty(&kiblnd_data.kib_connd_zombies)); - LASSERT (list_empty(&kiblnd_data.kib_connd_conns)); - - /* flag threads to terminate; wake and wait for them to die */ - kiblnd_data.kib_shutdown = 1; - wake_up_all(&kiblnd_data.kib_sched_waitq); - wake_up_all(&kiblnd_data.kib_connd_waitq); - - i = 2; - while (atomic_read(&kiblnd_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kiblnd_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - /* fall through */ - - case IBLND_INIT_NOTHING: - break; - } - - if (kiblnd_data.kib_peers != NULL) - LIBCFS_FREE(kiblnd_data.kib_peers, - sizeof(struct list_head) * - kiblnd_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after LND base cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - kiblnd_data.kib_init = IBLND_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -void -kiblnd_shutdown (lnet_ni_t *ni) -{ - kib_net_t *net = ni->ni_data; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - int i; - unsigned long flags; - - LASSERT(kiblnd_data.kib_init == IBLND_INIT_ALL); - - if (net == NULL) - goto out; - - CDEBUG(D_MALLOC, "before LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - write_lock_irqsave(g_lock, flags); - net->ibn_shutdown = 1; - write_unlock_irqrestore(g_lock, flags); - - switch (net->ibn_init) { - default: - LBUG(); - - case IBLND_INIT_ALL: - /* nuke all existing peers within this net */ - kiblnd_del_peer(ni, LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&net->ibn_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "%s: waiting for %d peers to disconnect\n", - libcfs_nid2str(ni->ni_nid), - atomic_read(&net->ibn_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - - kiblnd_unmap_tx_descs(ni); - - LASSERT (net->ibn_dev->ibd_nnets > 0); - net->ibn_dev->ibd_nnets--; - - /* fall through */ - - case IBLND_INIT_NOTHING: - LASSERT (atomic_read(&net->ibn_nconns) == 0); - -#if IBLND_MAP_ON_DEMAND - if (net->ibn_fmrpool != NULL) - ib_destroy_fmr_pool(net->ibn_fmrpool); -#endif - if (net->ibn_dev != NULL && - net->ibn_dev->ibd_nnets == 0) - kiblnd_destroy_dev(net->ibn_dev); - - break; - } - - kiblnd_free_tx_descs(ni); - - CDEBUG(D_MALLOC, "after LND net cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - net->ibn_init = IBLND_INIT_NOTHING; - ni->ni_data = NULL; - - LIBCFS_FREE(net, sizeof(*net)); - -out: - if (list_empty(&kiblnd_data.kib_devs)) - kiblnd_base_shutdown(); - return; -} - -int -kiblnd_base_startup (void) -{ - int rc; - int i; - - LASSERT (kiblnd_data.kib_init == IBLND_INIT_NOTHING); - - if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) { - CERROR("Can't set credits(%d) > ntx(%d)\n", - *kiblnd_tunables.kib_credits, - *kiblnd_tunables.kib_ntx); - return -EINVAL; - } - - PORTAL_MODULE_USE; - memset(&kiblnd_data, 0, sizeof(kiblnd_data)); /* zero pointers, flags etc */ - - rwlock_init(&kiblnd_data.kib_global_lock); - - INIT_LIST_HEAD(&kiblnd_data.kib_devs); - - kiblnd_data.kib_peer_hash_size = IBLND_PEER_HASH_SIZE; - LIBCFS_ALLOC(kiblnd_data.kib_peers, - sizeof(struct list_head) * kiblnd_data.kib_peer_hash_size); - if (kiblnd_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kiblnd_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kiblnd_data.kib_peers[i]); - - spin_lock_init(&kiblnd_data.kib_connd_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_conns); - INIT_LIST_HEAD(&kiblnd_data.kib_connd_zombies); - init_waitqueue_head(&kiblnd_data.kib_connd_waitq); - - spin_lock_init(&kiblnd_data.kib_sched_lock); - INIT_LIST_HEAD(&kiblnd_data.kib_sched_conns); - init_waitqueue_head(&kiblnd_data.kib_sched_waitq); - - kiblnd_data.kib_error_qpa.qp_state = IB_QPS_ERR; - - /* lists/ptrs/locks initialised */ - kiblnd_data.kib_init = IBLND_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBLND_N_SCHED; i++) { - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn o2iblnd scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kiblnd_thread_start(kiblnd_connd, NULL); - if (rc != 0) { - CERROR("Can't spawn o2iblnd connd: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kiblnd_data.kib_init = IBLND_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kiblnd_base_shutdown(); - return -ENETDOWN; -} - -int -kiblnd_startup (lnet_ni_t *ni) -{ - char *ifname; - kib_net_t *net; - kib_dev_t *ibdev; - struct list_head *tmp; - struct timeval tv; - int rc; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - if (kiblnd_data.kib_init == IBLND_INIT_NOTHING) { - rc = kiblnd_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - ni->ni_data = net; - if (net == NULL) - goto failed; - - memset(net, 0, sizeof(*net)); - - do_gettimeofday(&tv); - net->ibn_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - ni->ni_maxtxcredits = *kiblnd_tunables.kib_credits; - ni->ni_peertxcredits = *kiblnd_tunables.kib_peercredits; - - spin_lock_init(&net->ibn_tx_lock); - INIT_LIST_HEAD(&net->ibn_idle_txs); - - rc = kiblnd_alloc_tx_descs(ni); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - if (ni->ni_interfaces[0] != NULL) { - /* Use the IPoIB interface specified in 'networks=' */ - - CLASSERT (LNET_MAX_INTERFACES > 1); - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - goto failed; - } - - ifname = ni->ni_interfaces[0]; - } else { - ifname = *kiblnd_tunables.kib_default_ipif; - } - - if (strlen(ifname) >= sizeof(ibdev->ibd_ifname)) { - CERROR("IPoIB interface name too long: %s\n", ifname); - goto failed; - } - - ibdev = NULL; - list_for_each (tmp, &kiblnd_data.kib_devs) { - ibdev = list_entry(tmp, kib_dev_t, ibd_list); - - if (!strcmp(&ibdev->ibd_ifname[0], ifname)) - break; - - ibdev = NULL; - } - - if (ibdev == NULL) { - __u32 ip; - __u32 netmask; - int up; - struct rdma_cm_id *id; - struct ib_pd *pd; - struct ib_mr *mr; - struct sockaddr_in addr; - - rc = libcfs_ipif_query(ifname, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", - ifname, rc); - goto failed; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", - ifname); - goto failed; - } - - LIBCFS_ALLOC(ibdev, sizeof(*ibdev)); - if (ibdev == NULL) - goto failed; - - memset(ibdev, 0, sizeof(*ibdev)); - - INIT_LIST_HEAD(&ibdev->ibd_list); /* not yet in kib_devs */ - ibdev->ibd_ifip = ip; - strcpy(&ibdev->ibd_ifname[0], ifname); - - id = rdma_create_id(kiblnd_cm_callback, ibdev, RDMA_PS_TCP); - if (!IS_ERR(id)) { - ibdev->ibd_cmid = id; - } else { - CERROR("Can't create listen ID: %ld\n", PTR_ERR(id)); - goto failed; - } - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_port = htons(*kiblnd_tunables.kib_service); - addr.sin_addr.s_addr = htonl(ip); - - rc = rdma_bind_addr(id, (struct sockaddr *)&addr); - if (rc != 0) { - CERROR("Can't bind to %s: %d\n", ifname, rc); - goto failed; - } - - /* Binding should have assigned me an IB device */ - LASSERT (id->device != NULL); - - pd = ib_alloc_pd(id->device); - if (!IS_ERR(pd)) { - ibdev->ibd_pd = pd; - } else { - CERROR("Can't allocate PD: %ld\n", PTR_ERR(pd)); - goto failed; - } - -#if IBLND_MAP_ON_DEMAND - /* MR for sends and receives */ - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); -#else - /* MR for sends, recieves _and_ RDMA...........v */ - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); -#endif - if (!IS_ERR(mr)) { - ibdev->ibd_mr = mr; - } else { - CERROR("Can't get MR: %ld\n", PTR_ERR(pd)); - goto failed; - } - - rc = rdma_listen(id, 0); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - list_add_tail(&ibdev->ibd_list, - &kiblnd_data.kib_devs); - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ibdev->ibd_ifip); - net->ibn_dev = ibdev; - -#if IBLND_MAP_ON_DEMAND - /* FMR pool for RDMA */ - { - struct ib_fmr_pool *fmrpool; - struct ib_fmr_pool_param param = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, - .page_shift = PAGE_SHIFT, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE), - .pool_size = *kiblnd_tunables.kib_fmr_pool_size, - .dirty_watermark = *kiblnd_tunables.kib_fmr_flush_trigger, - .flush_function = NULL, - .flush_arg = NULL, - .cache = *kiblnd_tunables.kib_fmr_cache}; - - if (*kiblnd_tunables.kib_fmr_pool_size < - *kiblnd_tunables.kib_ntx) { - CERROR("Can't set fmr pool size (%d) < ntx(%d)\n", - *kiblnd_tunables.kib_fmr_pool_size, - *kiblnd_tunables.kib_ntx); - goto failed; - } - - fmrpool = ib_create_fmr_pool(ibdev->ibd_pd, ¶m); - if (!IS_ERR(fmrpool)) { - net->ibn_fmrpool = fmrpool; - } else { - CERROR("Can't create FMR pool: %ld\n", - PTR_ERR(fmrpool)); - goto failed; - } - } -#endif - - kiblnd_map_tx_descs(ni); - - ibdev->ibd_nnets++; - net->ibn_init = IBLND_INIT_ALL; - - return 0; - -failed: - kiblnd_shutdown(ni); - - CDEBUG(D_NET, "kiblnd_startup failed\n"); - return -ENETDOWN; -} - -void __exit -kiblnd_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kiblnd_tunables_fini(); -} - -int __init -kiblnd_module_init (void) -{ - int rc; - - CLASSERT (sizeof(kib_msg_t) <= IBLND_MSG_SIZE); -#if !IBLND_MAP_ON_DEMAND - CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); - CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBLND_MAX_RDMA_FRAGS]) - <= IBLND_MSG_SIZE); -#endif - rc = kiblnd_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel OpenIB gen2 LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kiblnd_module_init); -module_exit(kiblnd_module_fini); diff --git a/lnet/klnds/o2iblnd/o2iblnd.h b/lnet/klnds/o2iblnd/o2iblnd.h deleted file mode 100644 index 8afba88f771a1bba967b5787a924049ac9641244..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd.h +++ /dev/null @@ -1,630 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#if !HAVE_GFP_T -typedef int gfp_t; -#endif - -#include <rdma/rdma_cm.h> -#include <rdma/ib_cm.h> -#include <rdma/ib_verbs.h> -#include <rdma/ib_fmr_pool.h> - -/* tunables fixed at compile time */ -#ifdef CONFIG_SMP -# define IBLND_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBLND_N_SCHED 1 /* # schedulers */ -#endif - -#define IBLND_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBLND_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBLND_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBLND_CREDIT_HIGHWATER 7 /* when eagerly to return credits */ -#define IBLND_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ - -#define IBLND_MAP_ON_DEMAND 0 -#if IBLND_MAP_ON_DEMAND -# define IBLND_MAX_RDMA_FRAGS 1 -#else -# define IBLND_MAX_RDMA_FRAGS LNET_MAX_IOV -#endif - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBLND_TX_MSGS() (*kiblnd_tunables.kib_ntx) -#define IBLND_TX_MSG_BYTES() (IBLND_TX_MSGS() * IBLND_MSG_SIZE) -#define IBLND_TX_MSG_PAGES() ((IBLND_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -/* RX messages (per connection) */ -#define IBLND_RX_MSGS (IBLND_MSG_QUEUE_SIZE*2) -#define IBLND_RX_MSG_BYTES (IBLND_RX_MSGS * IBLND_MSG_SIZE) -#define IBLND_RX_MSG_PAGES ((IBLND_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -#define IBLND_CQ_ENTRIES() (IBLND_RX_MSGS + \ - (*kiblnd_tunables.kib_concurrent_sends) * \ - (1 + IBLND_MAX_RDMA_FRAGS)) - -typedef struct -{ - unsigned int *kib_service; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* ...exponentially increasing to this */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - char **kib_default_ipif; /* default IPoIB interface */ - int *kib_retry_count; - int *kib_rnr_retry_count; - int *kib_concurrent_sends; /* send work queue sizing */ - int *kib_ib_mtu; /* IB MTU */ -#if IBLND_MAP_ON_DEMAND - int *kib_fmr_pool_size; /* # FMRs in pool */ - int *kib_fmr_flush_trigger; /* When to trigger FMR flush */ - int *kib_fmr_cache; /* enable FMR pool cache? */ -#endif -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kib_sysctl; /* sysctl interface */ -#endif -} kib_tunables_t; - -typedef struct -{ - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; -} kib_pages_t; - -typedef struct -{ - struct list_head ibd_list; /* chain on kib_devs */ - __u32 ibd_ifip; /* IPoIB interface IP */ - char ibd_ifname[32]; /* IPoIB interface name */ - int ibd_nnets; /* # nets extant */ - - struct rdma_cm_id *ibd_cmid; /* IB listener (bound to 1 device) */ - struct ib_pd *ibd_pd; /* PD for the device */ - struct ib_mr *ibd_mr; /* MR for non RDMA I/O */ -} kib_dev_t; - -typedef struct -{ - __u64 ibn_incarnation; /* my epoch */ - int ibn_init; /* initialisation state */ - int ibn_shutdown; /* shutting down? */ - - atomic_t ibn_npeers; /* # peers extant */ - atomic_t ibn_nconns; /* # connections extant */ - - struct kib_tx *ibn_tx_descs; /* all the tx descriptors */ - kib_pages_t *ibn_tx_pages; /* premapped tx msg pages */ - struct list_head ibn_idle_txs; /* idle tx descriptors */ - spinlock_t ibn_tx_lock; /* serialise */ - -#if IBLND_MAP_ON_DEMAND - struct ib_fmr_pool *ibn_fmrpool; /* FMR pool for RDMA I/O */ -#endif - - kib_dev_t *ibn_dev; /* underlying IB device */ -} kib_net_t; - -typedef struct -{ - int kib_init; /* initialisation state */ - int kib_shutdown; /* shut down? */ - struct list_head kib_devs; /* IB devices extant */ - atomic_t kib_nthreads; /* # live threads */ - rwlock_t kib_global_lock; /* stabilize net/dev/peer/conn ops */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - - void *kib_connd; /* the connd task (serialisation assertions) */ - struct list_head kib_connd_conns; /* connections to setup/teardown */ - struct list_head kib_connd_zombies; /* connections with zero refcount */ - wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - struct list_head kib_sched_conns; /* conns to check for rx completions */ - spinlock_t kib_sched_lock; /* serialise */ - - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - struct ib_qp_attr kib_error_qpa; /* QP->ERROR */ -} kib_data_t; - -#define IBLND_INIT_NOTHING 0 -#define IBLND_INIT_DATA 1 -#define IBLND_INIT_ALL 2 - -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -typedef struct kib_connparams -{ - __u16 ibcp_queue_depth; - __u16 ibcp_max_frags; - __u32 ibcp_max_msg_size; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -#if IBLND_MAP_ON_DEMAND -typedef struct -{ - __u64 rd_addr; /* IO VMA address */ - __u32 rd_nob; /* # of bytes */ - __u32 rd_key; /* remote key */ -} WIRE_ATTR kib_rdma_desc_t; -#else -typedef struct -{ - __u32 rf_nob; /* # bytes this frag */ - __u64 rf_addr; /* CAVEAT EMPTOR: misaligned!! */ -} WIRE_ATTR kib_rdma_frag_t; - -typedef struct -{ - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrags; /* # fragments */ - kib_rdma_frag_t rd_frags[0]; /* buffer frags */ -} WIRE_ATTR kib_rdma_desc_t; -#endif - -typedef struct -{ - lnet_hdr_t ibprm_hdr; /* portals header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kib_putreq_msg_t; - -typedef struct -{ - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ibgm_hdr; /* portals header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibgm_rd; /* rdma descriptor */ -} WIRE_ATTR kib_get_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __s32 ibcm_status; /* < 0 failure: >= 0 length */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - - union { - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_putreq_msg_t putreq; - kib_putack_msg_t putack; - kib_get_msg_t get; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBLND_MSG_MAGIC LNET_PROTO_IB_MAGIC /* unique magic */ - -#define IBLND_MSG_VERSION 0x11 - -#define IBLND_MSG_CONNREQ 0xc0 /* connection request */ -#define IBLND_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBLND_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBLND_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBLND_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBLND_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBLND_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBLND_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBLND_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBLND_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -typedef struct { - __u32 ibr_magic; /* sender's magic */ - __u16 ibr_version; /* sender's version */ - __u8 ibr_why; /* reject reason */ -} WIRE_ATTR kib_rej_t; - - -/* connection rejection reasons */ -#define IBLND_REJECT_CONN_RACE 1 /* You lost connection race */ -#define IBLND_REJECT_NO_RESOURCES 2 /* Out of memory/conns etc */ -#define IBLND_REJECT_FATAL 3 /* Anything else */ - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - enum ib_wc_status rx_status; /* completion status */ - kib_msg_t *rx_msg; /* message buffer (host vaddr) */ - __u64 rx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR (rx_msgunmap); /* for dma_unmap_single() */ - struct ib_recv_wr rx_wrq; /* receive work item... */ - struct ib_sge rx_sge; /* ...and its memory */ -} kib_rx_t; - -#define IBLND_POSTRX_DONT_POST 0 /* don't post */ -#define IBLND_POSTRX_NO_CREDIT 1 /* post: no credits */ -#define IBLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ -#define IBLND_POSTRX_RSRVD_CREDIT 3 /* post: give myself back 1 reserved credit */ - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_queued; /* queued for sending */ - int tx_waiting; /* waiting for peer */ - int tx_status; /* LNET completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - kib_msg_t *tx_msg; /* message buffer (host vaddr) */ - __u64 tx_msgaddr; /* message buffer (I/O addr) */ - DECLARE_PCI_UNMAP_ADDR (tx_msgunmap); /* for dma_unmap_single() */ - int tx_nwrq; /* # send work items */ -#if IBLND_MAP_ON_DEMAND - struct ib_send_wr tx_wrq[2]; /* send work items... */ - struct ib_sge tx_sge[2]; /* ...and their memory */ - kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ - __u64 *tx_pages; /* rdma phys page addrs */ - struct ib_pool_fmr *tx_fmr; /* rdma mapping (mapped if != NULL) */ -#else - struct ib_send_wr *tx_wrq; /* send work items... */ - struct ib_sge *tx_sge; /* ...and their memory */ - kib_rdma_desc_t *tx_rd; /* rdma descriptor */ - int tx_nfrags; /* # entries in... */ - struct scatterlist *tx_frags; /* dma_map_sg descriptor */ - int tx_dmadir; /* dma direction */ -#endif -} kib_tx_t; - -typedef struct kib_connvars -{ - /* connection-in-progress variables */ - kib_msg_t cv_msg; -} kib_connvars_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - struct list_head ibc_sched_list; /* schedule for attention */ - __u64 ibc_incarnation; /* which instance of the peer */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits;/* # ACK/DONE msg credits */ - int ibc_comms_error; /* set on comms error */ - int ibc_nrx:8; /* receive buffers owned */ - int ibc_scheduled:1; /* scheduled for attention */ - int ibc_ready:1; /* CQ callback fired */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_queue; /* sends that need a credit */ - struct list_head ibc_tx_queue_nocred;/* sends that don't need a credit */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need to reserve an ACK/DONE msg */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - - struct rdma_cm_id *ibc_cmid; /* CM id */ - struct ib_cq *ibc_cq; /* completion queue */ - - kib_connvars_t *ibc_connvars; /* in-progress connection state */ -} kib_conn_t; - -#define IBLND_CONN_INIT 0 /* being intialised */ -#define IBLND_CONN_ACTIVE_CONNECT 1 /* active sending req */ -#define IBLND_CONN_PASSIVE_WAIT 2 /* passive waiting for rtu */ -#define IBLND_CONN_ESTABLISHED 3 /* connection established */ -#define IBLND_CONN_CLOSING 4 /* being closed */ -#define IBLND_CONN_DISCONNECTED 5 /* disconnected */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - lnet_ni_t *ibp_ni; /* LNet interface */ - atomic_t ibp_refcount; /* # users */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* current active connection attempts */ - int ibp_accepting; /* current passive connection attempts */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - - -extern kib_data_t kiblnd_data; -extern kib_tunables_t kiblnd_tunables; - -#define kiblnd_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kiblnd_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kiblnd_data.kib_connd_zombies); \ - wake_up(&kiblnd_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); \ - } \ -} while (0) - -#define kiblnd_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kiblnd_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kiblnd_destroy_peer(peer); \ -} while (0) - -static inline struct list_head * -kiblnd_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kiblnd_data.kib_peer_hash_size; - - return (&kiblnd_data.kib_peers [hash]); -} - -static inline int -kiblnd_peer_active (kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline kib_conn_t * -kiblnd_get_conn_locked (kib_peer_t *peer) -{ - LASSERT (!list_empty(&peer->ibp_conns)); - - /* just return the first connection */ - return list_entry(peer->ibp_conns.next, kib_conn_t, ibc_list); -} - -static inline int -kiblnd_send_keepalive(kib_conn_t *conn) -{ - return (*kiblnd_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kiblnd_tunables.kib_keepalive*HZ); -} - -static inline void -kiblnd_abort_receives(kib_conn_t *conn) -{ - ib_modify_qp(conn->ibc_cmid->qp, - &kiblnd_data.kib_error_qpa, IB_QP_STATE); -} - -/* CAVEAT EMPTOR: We rely on descriptor alignment to allow us to use the - * lowest bits of the work request id to stash the work item type. */ - -#define IBLND_WID_TX 0 -#define IBLND_WID_RDMA 1 -#define IBLND_WID_RX 2 -#define IBLND_WID_MASK 3UL - -static inline __u64 -kiblnd_ptr2wreqid (void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & IBLND_WID_MASK) == 0); - LASSERT ((type & ~IBLND_WID_MASK) == 0); - return (__u64)(lptr | type); -} - -static inline void * -kiblnd_wreqid2ptr (__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBLND_WID_MASK); -} - -static inline int -kiblnd_wreqid2type (__u64 wreqid) -{ - return (wreqid & IBLND_WID_MASK); -} - -static inline void -kiblnd_set_conn_state (kib_conn_t *conn, int state) -{ - conn->ibc_state = state; - mb(); -} - -#if IBLND_MAP_ON_DEMAND -static inline int -kiblnd_rd_size (kib_rdma_desc_t *rd) -{ - return rd->rd_nob; -} -#else -static inline int -kiblnd_rd_size (kib_rdma_desc_t *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrags; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} -#endif - -int kiblnd_startup (lnet_ni_t *ni); -void kiblnd_shutdown (lnet_ni_t *ni); -int kiblnd_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg); - -int kiblnd_tunables_init(void); -void kiblnd_tunables_fini(void); - -int kiblnd_connd (void *arg); -int kiblnd_scheduler(void *arg); -int kiblnd_thread_start (int (*fn)(void *arg), void *arg); - -int kiblnd_alloc_pages (kib_pages_t **pp, int npages); -void kiblnd_free_pages (kib_pages_t *p); - -int kiblnd_cm_callback(struct rdma_cm_id *cmid, - struct rdma_cm_event *event); - -int kiblnd_create_peer (lnet_ni_t *ni, kib_peer_t **peerp, lnet_nid_t nid); -void kiblnd_destroy_peer (kib_peer_t *peer); -void kiblnd_destroy_dev (kib_dev_t *dev); -void kiblnd_unlink_peer_locked (kib_peer_t *peer); -void kiblnd_peer_alive (kib_peer_t *peer); -kib_peer_t *kiblnd_find_peer_locked (lnet_nid_t nid); -void kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error); -int kiblnd_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation); - -void kiblnd_connreq_done(kib_conn_t *conn, int status); -kib_conn_t *kiblnd_create_conn (kib_peer_t *peer, struct rdma_cm_id *cmid, - int state); -void kiblnd_destroy_conn (kib_conn_t *conn); -void kiblnd_close_conn (kib_conn_t *conn, int error); -void kiblnd_close_conn_locked (kib_conn_t *conn, int error); - -int kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type, - int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie); - -void kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn); -void kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn); -void kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob); -void kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status); -void kiblnd_check_sends (kib_conn_t *conn); - -void kiblnd_qp_event(struct ib_event *event, void *arg); -void kiblnd_cq_event(struct ib_event *event, void *arg); -void kiblnd_cq_completion(struct ib_cq *cq, void *arg); - -void kiblnd_init_msg (kib_msg_t *msg, int type, int body_nob); -void kiblnd_pack_msg (lnet_ni_t *ni, kib_msg_t *msg, - int credits, lnet_nid_t dstnid, __u64 dststamp); -int kiblnd_unpack_msg(kib_msg_t *msg, int nob); -int kiblnd_post_rx (kib_rx_t *rx, int credit); - -int kiblnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kiblnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - - - diff --git a/lnet/klnds/o2iblnd/o2iblnd_cb.c b/lnet/klnds/o2iblnd/o2iblnd_cb.c deleted file mode 100644 index fbc98cb745255974d33035e859e9394348e48d10..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ /dev/null @@ -1,3159 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -char * -kiblnd_msgtype2str(int type) -{ - switch (type) { - case IBLND_MSG_CONNREQ: - return "CONNREQ"; - - case IBLND_MSG_CONNACK: - return "CONNACK"; - - case IBLND_MSG_NOOP: - return "NOOP"; - - case IBLND_MSG_IMMEDIATE: - return "IMMEDIATE"; - - case IBLND_MSG_PUT_REQ: - return "PUT_REQ"; - - case IBLND_MSG_PUT_NAK: - return "PUT_NAK"; - - case IBLND_MSG_PUT_ACK: - return "PUT_ACK"; - - case IBLND_MSG_PUT_DONE: - return "PUT_DONE"; - - case IBLND_MSG_GET_REQ: - return "GET_REQ"; - - case IBLND_MSG_GET_DONE: - return "GET_DONE"; - - default: - return "???"; - } -} - -void -kiblnd_tx_done (lnet_ni_t *ni, kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - kib_net_t *net = ni->ni_data; - int rc; - int i; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBLND_MAP_ON_DEMAND - if (tx->tx_fmr != NULL) { - rc = ib_fmr_pool_unmap(tx->tx_fmr); - LASSERT (rc == 0); - - if (tx->tx_status != 0) { - rc = ib_flush_fmr_pool(net->ibn_fmrpool); - LASSERT (rc == 0); - } - - tx->tx_fmr = NULL; - } -#else - if (tx->tx_nfrags != 0) { - dma_unmap_sg(net->ibn_dev->ibd_cmid->device->dma_device, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - tx->tx_nfrags = 0; - } -#endif - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn != NULL) { - LASSERT (ni == tx->tx_conn->ibc_peer->ibp_ni); - - kiblnd_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&net->ibn_tx_lock); - - list_add(&tx->tx_list, &net->ibn_idle_txs); - - spin_unlock(&net->ibn_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize(ni, lntmsg[i], rc); - } -} - -void -kiblnd_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kiblnd_tx_done(ni, tx); - } -} - -kib_tx_t * -kiblnd_get_idle_tx (lnet_ni_t *ni) -{ - kib_net_t *net = ni->ni_data; - kib_tx_t *tx; - - LASSERT (net != NULL); - - spin_lock(&net->ibn_tx_lock); - - if (list_empty(&net->ibn_idle_txs)) { - spin_unlock(&net->ibn_tx_lock); - return NULL; - } - - tx = list_entry(net->ibn_idle_txs.next, kib_tx_t, tx_list); - list_del(&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kiblnd_data.kib_next_tx_cookie++; - - spin_unlock(&net->ibn_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); -#if IBLND_MAP_ON_DEMAND - LASSERT (tx->tx_fmr == NULL); -#else - LASSERT (tx->tx_nfrags == 0); -#endif - - return tx; -} - -void -kiblnd_drop_rx (kib_rx_t *rx) -{ - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - LASSERT (conn->ibc_nrx > 0); - conn->ibc_nrx--; - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - kiblnd_conn_decref(conn); -} - -int -kiblnd_post_rx (kib_rx_t *rx, int credit) -{ - kib_conn_t *conn = rx->rx_conn; - kib_net_t *net = conn->ibc_peer->ibp_ni->ni_data; - struct ib_recv_wr *bad_wrq; - int rc; - - LASSERT (net != NULL); - LASSERT (!in_interrupt()); - LASSERT (credit == IBLND_POSTRX_NO_CREDIT || - credit == IBLND_POSTRX_PEER_CREDIT || - credit == IBLND_POSTRX_RSRVD_CREDIT); - - rx->rx_sge.length = IBLND_MSG_SIZE; - rx->rx_sge.lkey = net->ibn_dev->ibd_mr->lkey; - rx->rx_sge.addr = rx->rx_msgaddr; - - rx->rx_wrq.next = NULL; - rx->rx_wrq.sg_list = &rx->rx_sge; - rx->rx_wrq.num_sge = 1; - rx->rx_wrq.wr_id = kiblnd_ptr2wreqid(rx, IBLND_WID_RX); - - LASSERT (conn->ibc_state >= IBLND_CONN_INIT); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) { - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - rc = ib_post_recv(conn->ibc_cmid->qp, &rx->rx_wrq, &bad_wrq); - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) /* Initial post */ - return rc; - - if (rc != 0) { - CERROR("Can't post rx for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_close_conn(conn, rc); - kiblnd_drop_rx(rx); /* No more posts for this rx */ - return rc; - } - - if (credit == IBLND_POSTRX_NO_CREDIT) - return 0; - - spin_lock(&conn->ibc_lock); - if (credit == IBLND_POSTRX_PEER_CREDIT) - conn->ibc_outstanding_credits++; - else - conn->ibc_reserved_credits++; - spin_unlock(&conn->ibc_lock); - - kiblnd_check_sends(conn); - return 0; -} - -kib_tx_t * -kiblnd_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kiblnd_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kiblnd_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_close_conn(conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBLND_MSG_GET_REQ) { - lnet_set_reply_msg_len(ni, tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(ni, tx); -} - -void -kiblnd_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - kib_tx_t *tx = kiblnd_get_idle_tx(ni); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof(kib_completion_msg_t)); - - kiblnd_queue_tx(tx, conn); -} - -void -kiblnd_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int rc2; - int post_credit; - - LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - - if (conn->ibc_credits + credits > IBLND_MSG_QUEUE_SIZE) { - rc2 = conn->ibc_credits; - spin_unlock(&conn->ibc_lock); - - CERROR("Bad credits from %s: %d + %d > %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - rc2, credits, IBLND_MSG_QUEUE_SIZE); - - kiblnd_close_conn(conn, -EPROTO); - kiblnd_post_rx(rx, IBLND_POSTRX_NO_CREDIT); - return; - } - - conn->ibc_credits += credits; - - spin_unlock(&conn->ibc_lock); - kiblnd_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBLND message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_NO_CREDIT; - rc = -EPROTO; - break; - - case IBLND_MSG_NOOP: - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_IMMEDIATE: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_PUT_NAK: - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_PUT_ACK: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - - spin_lock(&conn->ibc_lock); - tx = kiblnd_find_waiting_tx_locked(conn, IBLND_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kiblnd_init_rdma(ni, tx, IBLND_MSG_PUT_DONE, - kiblnd_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBLND_MSG_PUT_DONE: - post_credit = IBLND_POSTRX_PEER_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBLND_MSG_GET_REQ: - post_credit = IBLND_POSTRX_DONT_POST; - rc = lnet_parse(ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - if (rc < 0) /* repost on error */ - post_credit = IBLND_POSTRX_PEER_CREDIT; - break; - - case IBLND_MSG_GET_DONE: - post_credit = IBLND_POSTRX_RSRVD_CREDIT; - kiblnd_handle_completion(conn, IBLND_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kiblnd_close_conn(conn, rc); - - if (post_credit != IBLND_POSTRX_DONT_POST) - kiblnd_post_rx(rx, post_credit); -} - -void -kiblnd_rx_complete (kib_rx_t *rx, int status, int nob) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - kib_net_t *net = ni->ni_data; - unsigned long flags; - int rc; - int err = -EIO; - - LASSERT (net != NULL); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBLND_CONN_ESTABLISHED) - goto ignore; - - if (status != IB_WC_SUCCESS) { - CDEBUG(D_NETERROR, "Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - goto failed; - } - - LASSERT (nob >= 0); - rx->rx_nob = nob; - - rc = kiblnd_unpack_msg(msg, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid || - msg->ibm_dstnid != ni->ni_nid || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != net->ibn_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* set time last known alive */ - kiblnd_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - } - kiblnd_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kiblnd_close_conn(conn, err); - ignore: - kiblnd_drop_rx(rx); /* Don't re-post rx. */ -} - -struct page * -kiblnd_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#if CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBLND_MAP_ON_DEMAND -int -kiblnd_setup_rd_iov(lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - struct scatterlist *sg; - int i; - int fragnob; - unsigned long vaddr; - struct page *page; - int page_offset; - kib_net_t *net = ni->ni_data; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (net != NULL); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kiblnd_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - sg->page = page; - sg->offset = page_offset; - sg->length = fragnob; - sg++; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - /* If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink */ - tx->tx_nfrags = sg - tx->tx_frags; - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - rd->rd_nfrags = dma_map_sg(net->ibn_dev->ibd_cmid->device->dma_device, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - rd->rd_key = (rd != tx->tx_rd) ? - net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey; - - for (i = 0; i < rd->rd_nfrags; i++) { - rd->rd_frags[i].rf_nob = sg_dma_len(&tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = sg_dma_address(&tx->tx_frags[i]); - } - - return 0; -} - -int -kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - struct scatterlist *sg; - int i; - int fragnob; - kib_net_t *net = ni->ni_data; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (net != NULL); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - sg = tx->tx_frags; - do { - LASSERT (nkiov > 0); - - fragnob = min((int)(kiov->kiov_len - offset), nob); - - memset(sg, 0, sizeof(*sg)); - sg->page = kiov->kiov_page; - sg->offset = kiov->kiov_offset + offset; - sg->length = fragnob; - sg++; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - /* If rd is not tx_rd, it's going to get sent to a peer and I'm the - * RDMA sink */ - tx->tx_nfrags = sg - tx->tx_frags; - tx->tx_dmadir = (rd != tx->tx_rd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - - rd->rd_nfrags = dma_map_sg(net->ibn_dev->ibd_cmid->device->dma_device, - tx->tx_frags, tx->tx_nfrags, tx->tx_dmadir); - rd->rd_key = (rd != tx->tx_rd) ? - net->ibn_dev->ibd_mr->rkey : net->ibn_dev->ibd_mr->lkey; - - for (i = 0; i < tx->tx_nfrags; i++) { - rd->rd_frags[i].rf_nob = sg_dma_len(&tx->tx_frags[i]); - rd->rd_frags[i].rf_addr = sg_dma_address(&tx->tx_frags[i]); -#if 0 - CDEBUG(D_WARNING,"frag[%d]: "LPX64" for %d\n", - i, rd->rd_frags[i].rf_addr, rd->rd_frags[i].rf_nob); -#endif - } - - return 0; -} -#else -int -kiblnd_map_tx (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int npages, unsigned long page_offset, int nob) -{ - struct ib_pool_fmr *fmr; - kib_net_t *net = ni->ni_data; - - LASSERT (net != NULL); - LASSERT (tx->tx_fmr == NULL); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - rd->rd_addr = 0; - - fmr = ib_fmr_pool_map_phys(net->ibn_fmrpool, tx->tx_pages, - npages, rd->rd_addr); - if (IS_ERR(fmr)) { - CERROR ("Can't map %d pages: %ld\n", npages, PTR_ERR(fmr)); - return PTR_ERR(fmr); - } - - /* If rd is not tx_rd, it's going to get sent to a peer, who will need - * the rkey */ - - rd->rd_key = (rd != tx->tx_rd) ? fmr->fmr->rkey : fmr->fmr->lkey; - rd->rd_nob = nob; - - tx->tx_fmr = fmr; - return 0; -} - -int -kiblnd_setup_rd_iov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kiblnd_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob); -} - -int -kiblnd_setup_rd_kiov (lnet_ni_t *ni, kib_tx_t *tx, kib_rdma_desc_t *rd, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kiblnd_map_tx(ni, tx, rd, npages, page_offset, nob); -} -#endif - -void -kiblnd_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - lnet_ni_t *ni = conn->ibc_peer->ibp_ni; - int rc; - int consume_cred = 0; - struct ib_send_wr *bad_wrq; - int done; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kiblnd_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBLND_CREDIT_HIGHWATER || - kiblnd_send_keepalive(conn))) { - spin_unlock(&conn->ibc_lock); - - tx = kiblnd_get_idle_tx(ni); - if (tx != NULL) - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kiblnd_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty (&conn->ibc_tx_queue_nocred)) { - tx = list_entry (conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing to send right now */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && - tx->tx_nwrq <= 1 + IBLND_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBLND_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBLND_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kiblnd_tunables.kib_concurrent_sends) { - /* tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - } - - list_del (&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBLND_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBLND_CREDIT_HIGHWATER && - !kiblnd_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kiblnd_tx_done(ni, tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kiblnd_pack_msg(ni, tx->tx_msg, conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); -#if 0 - { - int i; - - for (i = 0; i < tx->tx_nwrq - 1; i++) { - LASSERT (tx->tx_wrq[i].opcode == IB_WR_RDMA_WRITE); - LASSERT (tx->tx_wrq[i].next == &tx->tx_wrq[i+1]); - LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]); - - CDEBUG(D_WARNING, "WORK[%d]: RDMA "LPX64 - " for %d k %x -> "LPX64" k %x\n", i, - tx->tx_wrq[i].sg_list->addr, - tx->tx_wrq[i].sg_list->length, - tx->tx_wrq[i].sg_list->lkey, - tx->tx_wrq[i].wr.rdma.remote_addr, - tx->tx_wrq[i].wr.rdma.rkey); - } - - LASSERT (tx->tx_wrq[i].opcode == IB_WR_SEND); - LASSERT (tx->tx_wrq[i].next == NULL); - LASSERT (tx->tx_wrq[i].sg_list == &tx->tx_sge[i]); - - CDEBUG(D_WARNING, "WORK[%d]: SEND "LPX64" for %d k %x\n", i, - tx->tx_wrq[i].sg_list->addr, - tx->tx_wrq[i].sg_list->length, - tx->tx_wrq[i].sg_list->lkey); - } -#endif - /* I'm still holding ibc_lock! */ - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - rc = -ECONNABORTED; - else - rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq); - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CERROR("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG(D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kiblnd_close_conn(conn, rc); - - if (done) - kiblnd_tx_done(ni, tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kiblnd_tx_complete (kib_tx_t *tx, int status) -{ - int failed = (status != IB_WC_SUCCESS); - kib_conn_t *conn = tx->tx_conn; - int idle; - - LASSERT (tx->tx_sending > 0); - - if (failed) { - if (conn->ibc_state == IBLND_CONN_ESTABLISHED) - CDEBUG(D_NETERROR, "Tx -> %s cookie "LPX64 - "sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_cookie, tx->tx_sending, tx->tx_waiting, - status); - - kiblnd_close_conn(conn, -EIO); - } else { - kiblnd_peer_alive(conn->ibc_peer); - } - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; /* don't wait for peer */ - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kiblnd_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kiblnd_tx_done(conn->ibc_peer->ibp_ni, tx); - - kiblnd_check_sends(conn); - - kiblnd_conn_decref(conn); /* ...until here */ -} - -void -kiblnd_init_tx_msg (lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob) -{ - kib_net_t *net = ni->ni_data; - struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq]; - struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (net != NULL); - LASSERT (tx->tx_nwrq >= 0); - LASSERT (tx->tx_nwrq < IBLND_MAX_RDMA_FRAGS + 1); - LASSERT (nob <= IBLND_MSG_SIZE); - - kiblnd_init_msg(tx->tx_msg, type, body_nob); - - sge->addr = tx->tx_msgaddr; - sge->lkey = net->ibn_dev->ibd_mr->lkey; - sge->length = nob; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->next = NULL; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_TX); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_SEND; - wrq->send_flags = IB_SEND_SIGNALED; - - tx->tx_nwrq++; -} - -int -kiblnd_init_rdma (lnet_ni_t *ni, kib_tx_t *tx, int type, - int nob, kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - struct ib_sge *sge = &tx->tx_sge[0]; - struct ib_send_wr *wrq = &tx->tx_wrq[0]; - int rc = nob; - -#if IBLND_MAP_ON_DEMAND - LASSERT (!in_interrupt()); - LASSERT (tx->tx_nwrq == 0); - LASSERT (type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - sge->addr = srcrd->rd_addr; - sge->lkey = srcrd->rd_key; - sge->length = nob; - - wrq = &tx->tx_wrq[0]; - - wrq->next = &tx->tx_wrq[1]; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; - - wrq->wr.rdma.remote_addr = dstrd->rd_addr; - wrq->wr.rdma.rkey = dstrd->rd_key; - - tx->tx_nwrq = 1; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - LASSERT (!in_interrupt()); - LASSERT (tx->tx_nwrq == 0); - LASSERT (type == IBLND_MSG_GET_DONE || - type == IBLND_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrags) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrags) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBLND_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrags, - dstidx, dstrd->rd_nfrags); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - sge = &tx->tx_sge[tx->tx_nwrq]; - sge->addr = srcfrag->rf_addr; - sge->length = wrknob; - sge->lkey = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->next = wrq + 1; - wrq->wr_id = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA); - wrq->sg_list = sge; - wrq->num_sge = 1; - wrq->opcode = IB_WR_RDMA_WRITE; - wrq->send_flags = 0; - - wrq->wr.rdma.remote_addr = dstfrag->rf_addr; - wrq->wr.rdma.rkey = dstrd->rd_key; - - wrq++; - sge++; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - srcfrag->rf_nob -= wrknob; - srcfrag->rf_addr += wrknob; - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - dstfrag->rf_nob -= wrknob; - dstfrag->rf_addr += wrknob; - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kiblnd_init_tx_msg(ni, tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kiblnd_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kiblnd_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kiblnd_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBLND_MSG_PUT_DONE); - } else { - /* PUT_DONE first attached to conn as a PUT_REQ */ - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBLND_MSG_PUT_DONE); - } - - switch (tx->tx_msg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_PUT_REQ: - case IBLND_MSG_GET_REQ: - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBLND_MSG_PUT_NAK: - case IBLND_MSG_PUT_ACK: - case IBLND_MSG_PUT_DONE: - case IBLND_MSG_GET_DONE: - q = &conn->ibc_tx_queue_nocred; - break; - - case IBLND_MSG_NOOP: - case IBLND_MSG_IMMEDIATE: - q = &conn->ibc_tx_queue; - break; - } - - list_add_tail(&tx->tx_list, q); -} - -void -kiblnd_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kiblnd_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - - kiblnd_check_sends(conn); -} - -void -kiblnd_connect_peer (kib_peer_t *peer) -{ - struct rdma_cm_id *cmid; - struct sockaddr_in sockaddr; - int rc; - - LASSERT (peer->ibp_connecting > 0); - - cmid = rdma_create_id(kiblnd_cm_callback, peer, RDMA_PS_TCP); - if (IS_ERR(cmid)) { - CERROR("Can't create CMID for %s: %ld\n", - libcfs_nid2str(peer->ibp_nid), PTR_ERR(cmid)); - rc = PTR_ERR(cmid); - goto failed; - } - - memset(&sockaddr, 0, sizeof(sockaddr)); - sockaddr.sin_family = AF_INET; - sockaddr.sin_port = htons(*kiblnd_tunables.kib_service); - sockaddr.sin_addr.s_addr = htonl(LNET_NIDADDR(peer->ibp_nid)); - - kiblnd_peer_addref(peer); /* cmid's ref */ - - rc = rdma_resolve_addr(cmid, NULL, (struct sockaddr *)&sockaddr, - *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) - return; - - /* Can't initiate address resolution: */ - CERROR("Can't resolve addr for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - - kiblnd_peer_decref(peer); /* cmid's ref */ - rdma_destroy_id(cmid); - failed: - kiblnd_peer_connect_failed(peer, 1, rc); -} - -void -kiblnd_launch_tx (lnet_ni_t *ni, kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_peer_t *peer2; - kib_conn_t *conn; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - unsigned long flags; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - /* First time, just use a read lock since I expect to find my peer - * connected */ - read_lock_irqsave(g_lock, flags); - - peer = kiblnd_find_peer_locked(nid); - if (peer != NULL && !list_empty(&peer->ibp_conns)) { - /* Found a peer with an established connection */ - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - return; - } - - read_unlock(g_lock); - /* Re-try with a write lock */ - write_lock(g_lock); - - peer = kiblnd_find_peer_locked(nid); - if (peer != NULL) { - if (list_empty(&peer->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0); - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - return; - } - - write_unlock_irqrestore(g_lock, flags); - - /* Allocate a peer ready to add to the peer table and retry */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { - CERROR("Can't create peer %s\n", libcfs_nid2str(nid)); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kiblnd_tx_done(ni, tx); - return; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { - if (list_empty(&peer2->ibp_conns)) { - /* found a peer, but it's still connecting... */ - LASSERT (peer2->ibp_connecting != 0 || - peer2->ibp_accepting != 0); - list_add_tail (&tx->tx_list, &peer2->ibp_tx_queue); - write_unlock_irqrestore(g_lock, flags); - } else { - conn = kiblnd_get_conn_locked(peer2); - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_queue_tx(tx, conn); - kiblnd_conn_decref(conn); /* ...to here */ - } - - kiblnd_peer_decref(peer); - return; - } - - /* Brand new peer */ - LASSERT (peer->ibp_connecting == 0); - peer->ibp_connecting = 1; - - list_add_tail(&tx->tx_list, &peer->ibp_tx_queue); - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - - kiblnd_connect_peer(peer); - kiblnd_peer_decref(peer); -} - -int -kiblnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kiblnd_setup_rd_iov(ni, tx, - &ibmsg->ibm_u.get.ibgm_rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kiblnd_setup_rd_kiov(ni, tx, - &ibmsg->ibm_u.get.ibgm_rd, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } -#if IBLND_MAP_ON_DEMAND - nob = sizeof(kib_get_msg_t); -#else - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[tx->tx_nfrags]); -#endif - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBLND_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kiblnd_tx_done(ni, tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBLND_MSG_SIZE); - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBLND_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kiblnd_launch_tx(ni, tx, target.nid); - return 0; -} - -void -kiblnd_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, tx->tx_rd, - niov, iov, offset, nob); - else - rc = kiblnd_setup_rd_kiov(ni, tx, tx->tx_rd, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kiblnd_init_rdma(ni, tx, IBLND_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (nob == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kiblnd_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kiblnd_tx_done(ni, tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kiblnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_credit = IBLND_POSTRX_PEER_CREDIT; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBLND_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBLND_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBLND_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kiblnd_get_idle_tx(ni); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kiblnd_setup_rd_iov(ni, tx, - &txmsg->ibm_u.putack.ibpam_rd, - niov, iov, offset, mlen); - else - rc = kiblnd_setup_rd_kiov(ni, tx, - &txmsg->ibm_u.putack.ibpam_rd, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kiblnd_tx_done(ni, tx); - /* tell peer it's over */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBLND_MAP_ON_DEMAND - nob = sizeof(kib_putack_msg_t); -#else - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[tx->tx_nfrags]); -#endif - kiblnd_init_tx_msg(ni, tx, IBLND_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kiblnd_queue_tx(tx, conn); - - /* reposted buffer reserved for PUT_DONE */ - post_credit = IBLND_POSTRX_NO_CREDIT; - break; - - case IBLND_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kiblnd_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kiblnd_send_completion(rx->rx_conn, IBLND_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kiblnd_post_rx(rx, post_credit); - return rc; -} - -int -kiblnd_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kiblnd_data.kib_nthreads); - return (0); -} - -void -kiblnd_thread_fini (void) -{ - atomic_dec (&kiblnd_data.kib_nthreads); -} - -void -kiblnd_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kiblnd_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(peer->ibp_ni, - peer->ibp_nid, 0, last_alive); -} - -void -kiblnd_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context */ - unsigned long flags; - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (error != 0 || conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - if (error != 0 && conn->ibc_comms_error == 0) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBLND_CONN_ESTABLISHED) - return; /* already being handled */ - - if (error == 0 && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s\n", - libcfs_nid2str(peer->ibp_nid)); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)"); - } - - list_del (&conn->ibc_list); - /* connd (see below) takes over ibc_list's ref */ - - if (list_empty (&peer->ibp_conns) && /* no more conns */ - kiblnd_peer_active(peer)) { /* still in peer table */ - kiblnd_unlink_peer_locked(peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kiblnd_set_conn_state(conn, IBLND_CONN_CLOSING); - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kiblnd_data.kib_connd_conns); - wake_up (&kiblnd_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); -} - -void -kiblnd_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - kiblnd_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBLND_CONN_ESTABLISHED); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_handle_rx(rx); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || - tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kiblnd_txlist_done(conn->ibc_peer->ibp_ni, - &zombies, -ECONNABORTED); -} - -void -kiblnd_finalise_conn (kib_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state > IBLND_CONN_INIT); - - kiblnd_set_conn_state(conn, IBLND_CONN_DISCONNECTED); - - /* abort_receives moves QP state to IB_QPS_ERR. This is only required - * for connections that didn't get as far as being connected, because - * rdma_disconnect() does this for free. */ - kiblnd_abort_receives(conn); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kiblnd_abort_txs(conn, &conn->ibc_tx_queue); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kiblnd_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kiblnd_abort_txs(conn, &conn->ibc_active_txs); - - kiblnd_handle_early_rxs(conn); -} - -void -kiblnd_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT (error != 0); - LASSERT (!in_interrupt()); - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting > 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting > 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way... */ - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kiblnd_peer_active(peer)) - kiblnd_unlink_peer_locked(peer); - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - kiblnd_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_txlist_done(peer->ibp_ni, &zombies, -EHOSTUNREACH); -} - -void -kiblnd_connreq_done(kib_conn_t *conn, int status) -{ - struct list_head txs; - - kib_peer_t *peer = conn->ibc_peer; - int active; - unsigned long flags; - kib_tx_t *tx; - - active = (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - CDEBUG(D_NET,"%s: %d, %d\n", libcfs_nid2str(peer->ibp_nid), - active, status); - - LASSERT (!in_interrupt()); - LASSERT ((conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT && - peer->ibp_connecting > 0) || - (conn->ibc_state == IBLND_CONN_PASSIVE_WAIT && - peer->ibp_accepting > 0)); - - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - kiblnd_peer_connect_failed(conn->ibc_peer, active, status); - kiblnd_finalise_conn(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - conn->ibc_last_send = jiffies; - kiblnd_set_conn_state(conn, IBLND_CONN_ESTABLISHED); - kiblnd_peer_alive(peer); - - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ - kiblnd_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - kiblnd_close_stale_conns_locked(conn->ibc_peer, - conn->ibc_incarnation); - - if (!kiblnd_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error != 0) { /* error has happened already */ - - /* start to shut down connection */ - kiblnd_close_conn_locked(conn, -ECONNABORTED); - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - return; - } - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kiblnd_queue_tx_locked(tx, conn); - } - spin_unlock (&conn->ibc_lock); - - kiblnd_check_sends(conn); - - /* schedule blocked rxs */ - kiblnd_handle_early_rxs(conn); -} - -void -kiblnd_reject(struct rdma_cm_id *cmid, int why) -{ - int rc; - kib_rej_t rej = {.ibr_magic = IBLND_MSG_MAGIC, - .ibr_version = IBLND_MSG_VERSION, - .ibr_why = why}; - - rc = rdma_reject(cmid, &rej, sizeof(rej)); - - if (rc != 0) - CWARN("Error %d sending reject\n", rc); -} - -int -kiblnd_passive_connect (struct rdma_cm_id *cmid, void *priv, int priv_nob) -{ - kib_msg_t *ackmsg; - kib_msg_t *reqmsg = priv; - rwlock_t *g_lock = &kiblnd_data.kib_global_lock; - struct rdma_conn_param cp; - unsigned long flags; - lnet_ni_t *ni = NULL; - kib_dev_t *ibdev; - kib_peer_t *peer; - kib_peer_t *peer2; - kib_conn_t *conn; - lnet_nid_t nid; - int rc; - int rej = IBLND_REJECT_FATAL; - - LASSERT (!in_interrupt()); - - /* cmid inherits 'context' from the corresponding listener id */ - ibdev = (kib_dev_t *)cmid->context; - LASSERT (ibdev != NULL); - - if (priv_nob < offsetof(kib_msg_t, ibm_type)) { - CERROR("Short connection request\n"); - goto failed; - } - - if (reqmsg->ibm_magic == LNET_PROTO_MAGIC || - reqmsg->ibm_magic == __swab32(LNET_PROTO_MAGIC) || - (reqmsg->ibm_magic == IBLND_MSG_MAGIC && - reqmsg->ibm_version != IBLND_MSG_VERSION) || - (reqmsg->ibm_magic == __swab32(IBLND_MSG_MAGIC) && - reqmsg->ibm_version != __swab16(IBLND_MSG_VERSION))) { - /* Future protocol version compatibility support! If the - * o2iblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will - * negotiate a protocol version. I trap this here to avoid - * console errors; the reject tells the peer which protocol I - * speak. */ - goto failed; - } - - rc = kiblnd_unpack_msg(reqmsg, priv_nob); - if (rc != 0) { - CERROR("Can't parse connection request: %d\n", rc); - goto failed; - } - - nid = reqmsg->ibm_srcnid; - - if (reqmsg->ibm_type != IBLND_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - reqmsg->ibm_type, libcfs_nid2str(nid)); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) { - CERROR("Can't accept %s: incompatible max_frags %d (%d wanted)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - goto failed; - } - - if (reqmsg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(nid), - reqmsg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - goto failed; - } - - ni = lnet_net2ni(LNET_NIDNET(reqmsg->ibm_dstnid)); - if (ni == NULL || /* no matching net */ - ni->ni_nid != reqmsg->ibm_dstnid || /* right NET, wrong NID! */ - ((kib_net_t*)ni->ni_data)->ibn_dev != ibdev) { /* wrong device */ - CERROR("Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(nid), - libcfs_nid2str(reqmsg->ibm_dstnid)); - - goto failed; - } - - /* assume 'nid' is a new peer; create */ - rc = kiblnd_create_peer(ni, &peer, nid); - if (rc != 0) { - CERROR("Can't create peer for %s\n", libcfs_nid2str(nid)); - rej = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kiblnd_find_peer_locked(nid); - if (peer2 != NULL) { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - nid < ni->ni_nid) { - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kiblnd_peer_decref(peer); - rej = IBLND_REJECT_CONN_RACE; - goto failed; - } - - peer2->ibp_accepting++; - kiblnd_peer_addref(peer2); - - write_unlock_irqrestore(g_lock, flags); - kiblnd_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - peer->ibp_accepting = 1; - - kiblnd_peer_addref(peer); - list_add_tail(&peer->ibp_list, kiblnd_nid2peerlist(nid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_PASSIVE_WAIT); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer, 0, -ENOMEM); - kiblnd_peer_decref(peer); - rej = IBLND_REJECT_NO_RESOURCES; - goto failed; - } - - /* conn now "owns" cmid, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. */ - - conn->ibc_incarnation = reqmsg->ibm_srcstamp; - conn->ibc_credits = IBLND_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBLND_RX_MSGS); - - ackmsg = &conn->ibc_connvars->cv_msg; - memset(ackmsg, 0, sizeof(*ackmsg)); - - kiblnd_init_msg(ackmsg, IBLND_MSG_CONNACK, - sizeof(ackmsg->ibm_u.connparams)); - ackmsg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE; - ackmsg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS; - ackmsg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(ni, ackmsg, 0, nid, reqmsg->ibm_srcstamp); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = ackmsg; - cp.private_data_len = ackmsg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - CDEBUG(D_NET, "Accept %s\n", libcfs_nid2str(nid)); - - rc = rdma_accept(cmid, &cp); - if (rc != 0) { - CERROR("Can't accept %s: %d\n", libcfs_nid2str(nid), rc); - kiblnd_reject(cmid, IBLND_REJECT_FATAL); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - lnet_ni_decref(ni); - return 0; - - failed: - if (ni != NULL) - lnet_ni_decref(ni); - - kiblnd_reject(cmid, rej); - return -ECONNREFUSED; -} - -void -kiblnd_reconnect (kib_conn_t *conn, char *why) -{ - kib_peer_t *peer = conn->ibc_peer; - int retry = 0; - unsigned long flags; - - LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */ - - write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - /* retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress */ - if (!list_empty(&peer->ibp_tx_queue) && - peer->ibp_connecting == 1 && - peer->ibp_accepting == 0) { - retry = 1; - peer->ibp_connecting++; - } - - write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (retry) { - CDEBUG(D_NETERROR, "%s: retrying (%s)\n", - libcfs_nid2str(peer->ibp_nid), why); - kiblnd_connect_peer(peer); - } -} - -void -kiblnd_rejected (kib_conn_t *conn, int reason, void *priv, int priv_nob) -{ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT); - - switch (reason) { - case IB_CM_REJ_STALE_CONN: - kiblnd_reconnect(conn, "stale"); - break; - - case IB_CM_REJ_CONSUMER_DEFINED: - if (priv_nob >= sizeof(kib_rej_t)) { - kib_rej_t *rej = priv; - - if (rej->ibr_magic == __swab32(IBLND_MSG_MAGIC) || - rej->ibr_magic == __swab32(LNET_PROTO_MAGIC)) { - __swab32s(&rej->ibr_magic); - __swab16s(&rej->ibr_version); - } - - if (rej->ibr_magic != IBLND_MSG_MAGIC && - rej->ibr_magic != LNET_PROTO_MAGIC) { - CERROR("%s rejected: consumer defined fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - } - - if (rej->ibr_version != IBLND_MSG_VERSION) { - CERROR("%s rejected: o2iblnd version %d error\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_version); - break; - } - - switch (rej->ibr_why) { - case IBLND_REJECT_CONN_RACE: - kiblnd_reconnect(conn, "conn race"); - break; - - case IBLND_REJECT_NO_RESOURCES: - CERROR("%s rejected: o2iblnd no resources\n", - libcfs_nid2str(peer->ibp_nid)); - break; - case IBLND_REJECT_FATAL: - CERROR("%s rejected: o2iblnd fatal error\n", - libcfs_nid2str(peer->ibp_nid)); - break; - default: - CERROR("%s rejected: o2iblnd reason %d\n", - libcfs_nid2str(peer->ibp_nid), - rej->ibr_why); - break; - } - break; - } - /* fall through */ - default: - CDEBUG(D_NETERROR, "%s rejected: reason %d, size %d\n", - libcfs_nid2str(peer->ibp_nid), reason, priv_nob); - break; - } - - kiblnd_connreq_done(conn, -ECONNREFUSED); -} - -void -kiblnd_check_connreply (kib_conn_t *conn, void *priv, int priv_nob) -{ - kib_peer_t *peer = conn->ibc_peer; - lnet_ni_t *ni = peer->ibp_ni; - kib_net_t *net = ni->ni_data; - kib_msg_t *msg = priv; - int rc = kiblnd_unpack_msg(msg, priv_nob); - unsigned long flags; - - LASSERT (net != NULL); - - if (rc != 0) { - CERROR("Can't unpack connack from %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - goto failed; - } - - if (msg->ibm_type != IBLND_MSG_CONNACK) { - CERROR("Unexpected message %d from %s\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid)); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBLND_MSG_QUEUE_SIZE) { - CERROR("%s has incompatible queue depth %d(%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_frags != IBLND_MAX_RDMA_FRAGS) { - CERROR("%s has incompatible max_frags %d (%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_queue_depth, - IBLND_MSG_QUEUE_SIZE); - rc = -EPROTO; - goto failed; - } - - if (msg->ibm_u.connparams.ibcp_max_msg_size > IBLND_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg->ibm_u.connparams.ibcp_max_msg_size, - IBLND_MSG_SIZE); - rc = -EPROTO; - goto failed; - } - - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - if (msg->ibm_dstnid == ni->ni_nid && - msg->ibm_dststamp == net->ibn_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Stale connection reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - goto failed; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBLND_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBLND_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBLND_RX_MSGS); - - kiblnd_connreq_done(conn, 0); - return; - - failed: - /* NB My QP has already established itself, so I handle anything going - * wrong here by setting ibc_comms_error. - * kiblnd_connreq_done(0) moves the conn state to ESTABLISHED, but then - * immediately tears it down. */ - - LASSERT (rc != 0); - conn->ibc_comms_error = rc; - kiblnd_connreq_done(conn, 0); -} - -int -kiblnd_active_connect (struct rdma_cm_id *cmid) -{ - kib_peer_t *peer = (kib_peer_t *)cmid->context; - kib_conn_t *conn; - kib_msg_t *msg; - struct rdma_conn_param cp; - int rc; - - conn = kiblnd_create_conn(peer, cmid, IBLND_CONN_ACTIVE_CONNECT); - if (conn == NULL) { - kiblnd_peer_connect_failed(peer, 1, -ENOMEM); - kiblnd_peer_decref(peer); /* lose cmid's ref */ - return -ENOMEM; - } - - /* conn "owns" cmid now, so I return success from here on to ensure the - * CM callback doesn't destroy cmid. conn also takes over cmid's ref - * on peer */ - - msg = &conn->ibc_connvars->cv_msg; - - memset(msg, 0, sizeof(*msg)); - kiblnd_init_msg(msg, IBLND_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = IBLND_MSG_QUEUE_SIZE; - msg->ibm_u.connparams.ibcp_max_frags = IBLND_MAX_RDMA_FRAGS; - msg->ibm_u.connparams.ibcp_max_msg_size = IBLND_MSG_SIZE; - kiblnd_pack_msg(peer->ibp_ni, msg, 0, peer->ibp_nid, 0); - - memset(&cp, 0, sizeof(cp)); - cp.private_data = msg; - cp.private_data_len = msg->ibm_nob; - cp.responder_resources = 0; /* No atomic ops or RDMA reads */ - cp.initiator_depth = 0; - cp.flow_control = 1; - cp.retry_count = *kiblnd_tunables.kib_retry_count; - cp.rnr_retry_count = *kiblnd_tunables.kib_rnr_retry_count; - - LASSERT(cmid->context == (void *)conn); - LASSERT(conn->ibc_cmid == cmid); - - rc = rdma_connect(cmid, &cp); - if (rc != 0) { - CERROR("Can't connect to %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - kiblnd_connreq_done(conn, rc); - kiblnd_conn_decref(conn); - } - - return 0; -} - -int -kiblnd_cm_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) -{ - kib_peer_t *peer; - kib_conn_t *conn; - int rc; - - switch (event->event) { - default: - LBUG(); - - case RDMA_CM_EVENT_CONNECT_REQUEST: - /* destroy cmid on failure */ - rc = kiblnd_passive_connect(cmid, - event->private_data, - event->private_data_len); - CDEBUG(D_NET, "connreq: %d\n", rc); - return rc; - - case RDMA_CM_EVENT_ADDR_ERROR: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NETERROR, "%s: ADDR ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ADDR_RESOLVED: - peer = (kib_peer_t *)cmid->context; - - CDEBUG(D_NET,"%s Addr resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status != 0) { - CDEBUG(D_NETERROR, "Can't resolve address for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - rc = event->status; - } else { - rc = rdma_resolve_route( - cmid, *kiblnd_tunables.kib_timeout * 1000); - if (rc == 0) - return 0; - /* Can't initiate route resolution */ - CERROR("Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), rc); - } - kiblnd_peer_connect_failed(peer, 1, rc); - kiblnd_peer_decref(peer); - return rc; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_ERROR: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NETERROR, "%s: ROUTE ERROR %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, -EHOSTUNREACH); - kiblnd_peer_decref(peer); - return -EHOSTUNREACH; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_ROUTE_RESOLVED: - peer = (kib_peer_t *)cmid->context; - CDEBUG(D_NET,"%s Route resolved: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - - if (event->status == 0) - return kiblnd_active_connect(cmid); - - CDEBUG(D_NETERROR, "Can't resolve route for %s: %d\n", - libcfs_nid2str(peer->ibp_nid), event->status); - kiblnd_peer_connect_failed(peer, 1, event->status); - kiblnd_peer_decref(peer); - return event->status; /* rc != 0 destroys cmid */ - - case RDMA_CM_EVENT_UNREACHABLE: - conn = (kib_conn_t *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CDEBUG(D_NETERROR, "%s: UNREACHABLE %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENETDOWN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_CONNECT_ERROR: - conn = (kib_conn_t *)cmid->context; - LASSERT(conn->ibc_state == IBLND_CONN_ACTIVE_CONNECT || - conn->ibc_state == IBLND_CONN_PASSIVE_WAIT); - CDEBUG(D_NETERROR, "%s: CONNECT ERROR %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->status); - kiblnd_connreq_done(conn, -ENOTCONN); - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_REJECTED: - conn = (kib_conn_t *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CERROR ("%s: REJECTED %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - event->status); - kiblnd_connreq_done(conn, -ECONNRESET); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - kiblnd_rejected(conn, event->status, - event->private_data, - event->private_data_len); - break; - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_ESTABLISHED: - conn = (kib_conn_t *)cmid->context; - switch (conn->ibc_state) { - default: - LBUG(); - - case IBLND_CONN_PASSIVE_WAIT: - CDEBUG(D_NET, "ESTABLISHED (passive): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, 0); - break; - - case IBLND_CONN_ACTIVE_CONNECT: - CDEBUG(D_NET, "ESTABLISHED(active): %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_check_connreply(conn, - event->private_data, - event->private_data_len); - break; - } - /* net keeps its ref on conn! */ - return 0; - - case RDMA_CM_EVENT_DISCONNECTED: - conn = (kib_conn_t *)cmid->context; - if (conn->ibc_state < IBLND_CONN_ESTABLISHED) { - CERROR("%s DISCONNECTED\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kiblnd_connreq_done(conn, -ECONNRESET); - } else { - kiblnd_close_conn(conn, 0); - } - kiblnd_conn_decref(conn); - return 0; - - case RDMA_CM_EVENT_DEVICE_REMOVAL: - LCONSOLE_ERROR("Received notification of device removal\n"); - LCONSOLE_ERROR("Please shutdown LNET to allow this to proceed\n"); - /* Can't remove network from underneath LNET for now, so I have - * to ignore this */ - return 0; - } -} - -int -kiblnd_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs != &conn->ibc_active_txs) { - LASSERT (tx->tx_queued); - } else { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kiblnd_conn_timed_out (kib_conn_t *conn) -{ - return kiblnd_check_txs(conn, &conn->ibc_tx_queue) || - kiblnd_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kiblnd_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kiblnd_check_txs(conn, &conn->ibc_active_txs); -} - -void -kiblnd_check_conns (int idx) -{ - struct list_head *peers = &kiblnd_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kiblnd_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBLND_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kiblnd_check_sends(conn); - - if (!kiblnd_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kiblnd_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kiblnd_close_conn(conn, -ETIMEDOUT); - kiblnd_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); -} - -void -kiblnd_disconnect_conn (kib_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (current == kiblnd_data.kib_connd); - LASSERT (conn->ibc_state == IBLND_CONN_CLOSING); - - rdma_disconnect(conn->ibc_cmid); - kiblnd_finalise_conn(conn); - - kiblnd_peer_notify(conn->ibc_peer); -} - -int -kiblnd_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kiblnd_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - kiblnd_data.kib_connd = current; - - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - - dropped_lock = 0; - - if (!list_empty (&kiblnd_data.kib_connd_zombies)) { - conn = list_entry (kiblnd_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - kiblnd_destroy_conn(conn); - - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - if (!list_empty (&kiblnd_data.kib_connd_conns)) { - conn = list_entry (kiblnd_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - kiblnd_disconnect_conn(conn); - kiblnd_conn_decref(conn); - - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kiblnd_data.kib_peer_hash_size; - - spin_unlock_irqrestore(&kiblnd_data.kib_connd_lock, flags); - dropped_lock = 1; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kiblnd_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kiblnd_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kiblnd_check_conns(peer_index); - peer_index = (peer_index + 1) % - kiblnd_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kiblnd_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kiblnd_data.kib_connd_waitq, &wait); - spin_lock_irqsave (&kiblnd_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kiblnd_data.kib_connd_lock, flags); - - kiblnd_thread_fini(); - return (0); -} - -void -kiblnd_qp_event(struct ib_event *event, void *arg) -{ - kib_conn_t *conn = arg; - - switch (event->event) { - case IB_EVENT_COMM_EST: - CDEBUG(D_NET, "%s established\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - - default: - CERROR("%s: Async QP event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); - return; - } -} - -void -kiblnd_complete (struct ib_wc *wc) -{ - switch (kiblnd_wreqid2type(wc->wr_id)) { - default: - LBUG(); - - case IBLND_WID_RDMA: - /* We only get RDMA completion notification if it fails. All - * subsequent work items, including the final SEND will fail - * too. However we can't print out any more info about the - * failing RDMA because 'tx' might be back on the idle list or - * even reused already if we didn't manage to post all our work - * items */ - CDEBUG(D_NETERROR, "RDMA (tx: %p) failed: %d\n", - kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_TX: - kiblnd_tx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status); - return; - - case IBLND_WID_RX: - kiblnd_rx_complete(kiblnd_wreqid2ptr(wc->wr_id), wc->status, - wc->byte_len); - return; - } -} - -void -kiblnd_cq_completion (struct ib_cq *cq, void *arg) -{ - /* NB I'm not allowed to schedule this conn once its refcount has - * reached 0. Since fundamentally I'm racing with scheduler threads - * consuming my CQ I could be called after all completions have - * occurred. But in this case, ibc_nrx == 0 && ibc_nsends_posted == 0 - * and this CQ is about to be destroyed so I NOOP. */ - kib_conn_t *conn = (kib_conn_t *)arg; - unsigned long flags; - - LASSERT (cq == conn->ibc_cq); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - - conn->ibc_ready = 1; - - if (!conn->ibc_scheduled && - (conn->ibc_nrx > 0 || - conn->ibc_nsends_posted > 0)) { - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - conn->ibc_scheduled = 1; - list_add_tail(&conn->ibc_sched_list, - &kiblnd_data.kib_sched_conns); - wake_up(&kiblnd_data.kib_sched_waitq); - } - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); -} - -void -kiblnd_cq_event(struct ib_event *event, void *arg) -{ - kib_conn_t *conn = arg; - - CERROR("%s: async CQ event type %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), event->event); -} - -int -kiblnd_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - unsigned long flags; - kib_conn_t *conn; - struct ib_wc wc; - int rc; - int did_something; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kiblnd_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - - while (!kiblnd_data.kib_shutdown) { - if (busy_loops++ >= IBLND_RESCHED) { - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - } - - did_something = 0; - - if (!list_empty(&kiblnd_data.kib_sched_conns)) { - conn = list_entry(kiblnd_data.kib_sched_conns.next, - kib_conn_t, ibc_sched_list); - /* take over kib_sched_conns' ref on conn... */ - LASSERT(conn->ibc_scheduled); - list_del(&conn->ibc_sched_list); - conn->ibc_ready = 0; - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - if (rc == 0) { - rc = ib_req_notify_cq(conn->ibc_cq, - IB_CQ_NEXT_COMP); - LASSERT (rc >= 0); - - rc = ib_poll_cq(conn->ibc_cq, 1, &wc); - } - - LASSERT (rc >= 0); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, - flags); - - if (rc != 0 || conn->ibc_ready) { - /* There may be another completion waiting; get - * another scheduler to check while I handle - * this one... */ - kiblnd_conn_addref(conn); /* +1 ref for sched_conns */ - list_add_tail(&conn->ibc_sched_list, - &kiblnd_data.kib_sched_conns); - wake_up(&kiblnd_data.kib_sched_waitq); - } else { - conn->ibc_scheduled = 0; - } - - if (rc != 0) { - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, - flags); - - kiblnd_complete(&wc); - - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, - flags); - } - - kiblnd_conn_decref(conn); /* ...drop my ref from above */ - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kiblnd_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - schedule(); - busy_loops = 0; - - remove_wait_queue(&kiblnd_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kiblnd_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kiblnd_data.kib_sched_lock, flags); - - kiblnd_thread_fini(); - return (0); -} diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c deleted file mode 100644 index ef42ffec40c22b96e945ee3bd3165445cf78943f..0000000000000000000000000000000000000000 --- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c +++ /dev/null @@ -1,218 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "o2iblnd.h" - -static int service = 987; -CFS_MODULE_PARM(service, "i", int, 0444, - "service number (within RDMA_PS_TCP)"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 64; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static char *ipif_name = "ib0"; -CFS_MODULE_PARM(ipif_name, "s", charp, 0444, - "IPoIB interface name"); - -static int retry_count = 5; -CFS_MODULE_PARM(retry_count, "i", int, 0644, - "Retransmissions when no ACK received"); - -static int rnr_retry_count = 6; -CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644, - "RNR retransmissions"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int ib_mtu = 0; -CFS_MODULE_PARM(ib_mtu, "i", int, 0444, - "IB MTU 256/512/1024/2048/4096"); - -#if IBLND_MAP_ON_DEMAND -static int concurrent_sends = IBLND_RX_MSGS; -#else -static int concurrent_sends = IBLND_MSG_QUEUE_SIZE; -#endif -CFS_MODULE_PARM(concurrent_sends, "i", int, 0444, - "send work-queue sizing"); - -#if IBLND_MAP_ON_DEMAND -static int fmr_pool_size = 512; -CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444, - "size of the fmr pool (>= ntx)"); - -static int fmr_flush_trigger = 384; -CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444, - "# dirty FMRs that triggers pool flush"); - -static int fmr_cache = 1; -CFS_MODULE_PARM(fmr_cache, "i", int, 0444, - "non-zero to enable FMR caching"); -#endif - -kib_tunables_t kiblnd_tunables = { - .kib_service = &service, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_keepalive = &keepalive, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_default_ipif = &ipif_name, - .kib_retry_count = &retry_count, - .kib_rnr_retry_count = &rnr_retry_count, - .kib_concurrent_sends = &concurrent_sends, - .kib_ib_mtu = &ib_mtu, -#if IBLND_MAP_ON_DEMAND - .kib_fmr_pool_size = &fmr_pool_size, - .kib_fmr_flush_trigger = &fmr_flush_trigger, - .kib_fmr_cache = &fmr_cache, -#endif -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -static char ipif_basename_space[32]; - -static ctl_table kiblnd_ctl_table[] = { - {1, "service", &service, - sizeof(int), 0444, NULL, &proc_dointvec}, - {2, "cksum", &cksum, - sizeof(int), 0644, NULL, &proc_dointvec}, - {3, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {4, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {5, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {6, "peer_credits", &peer_credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {7, "ipif_name", ipif_basename_space, - sizeof(ipif_basename_space), 0444, NULL, &proc_dostring}, - {8, "retry_count", &retry_count, - sizeof(int), 0644, NULL, &proc_dointvec}, - {9, "rnr_retry_count", &rnr_retry_count, - sizeof(int), 0644, NULL, &proc_dointvec}, - {10, "keepalive", &keepalive, - sizeof(int), 0644, NULL, &proc_dointvec}, - {11, "concurrent_sends", &concurrent_sends, - sizeof(int), 0644, NULL, &proc_dointvec}, - {12, "ib_mtu", &ib_mtu, - sizeof(int), 0444, NULL, &proc_dointvec}, -#if IBLND_MAP_ON_DEMAND - {12, "fmr_pool_size", &fmr_pool_size, - sizeof(int), 0444, NULL, &proc_dointvec}, - {13, "fmr_flush_trigger", &fmr_flush_trigger, - sizeof(int), 0444, NULL, &proc_dointvec}, - {14, "fmr_cache", &fmr_cache, - sizeof(int), 0444, NULL, &proc_dointvec}, -#endif - {0} -}; - -static ctl_table kiblnd_top_ctl_table[] = { - {203, "o2iblnd", NULL, 0, 0555, kiblnd_ctl_table}, - {0} -}; - -void -kiblnd_initstrtunable(char *space, char *str, int size) -{ - strncpy(space, str, size); - space[size-1] = 0; -} - -void -kiblnd_sysctl_init (void) -{ - kiblnd_initstrtunable(ipif_basename_space, ipif_name, - sizeof(ipif_basename_space)); - - kiblnd_tunables.kib_sysctl = - register_sysctl_table(kiblnd_top_ctl_table, 0); - - if (kiblnd_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); -} - -void -kiblnd_sysctl_fini (void) -{ - if (kiblnd_tunables.kib_sysctl != NULL) - unregister_sysctl_table(kiblnd_tunables.kib_sysctl); -} - -#else - -void -kiblnd_sysctl_init (void) -{ -} - -void -kiblnd_sysctl_fini (void) -{ -} - -#endif - -int -kiblnd_tunables_init (void) -{ - kiblnd_sysctl_init(); - - if (*kiblnd_tunables.kib_concurrent_sends > IBLND_RX_MSGS) - *kiblnd_tunables.kib_concurrent_sends = IBLND_RX_MSGS; - if (*kiblnd_tunables.kib_concurrent_sends < IBLND_MSG_QUEUE_SIZE) - *kiblnd_tunables.kib_concurrent_sends = IBLND_MSG_QUEUE_SIZE; - - return 0; -} - -void -kiblnd_tunables_fini (void) -{ - kiblnd_sysctl_fini(); -} - - - diff --git a/lnet/klnds/openiblnd/.cvsignore b/lnet/klnds/openiblnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/openiblnd/Makefile.in b/lnet/klnds/openiblnd/Makefile.in deleted file mode 100644 index 86fa9cd37b94dba5c1657f0614e4f0b1d13e75f7..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kopeniblnd -kopeniblnd-objs := openiblnd.o openiblnd_cb.o openiblnd_modparams.o - -EXTRA_POST_CFLAGS := @OPENIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/openiblnd/autoMakefile.am b/lnet/klnds/openiblnd/autoMakefile.am deleted file mode 100644 index b4e0fb70aeef8fef8953406350cc3be79936992a..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_OPENIBLND -modulenet_DATA = kopeniblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kopeniblnd-objs:%.o=%.c) openiblnd.h diff --git a/lnet/klnds/openiblnd/openiblnd.c b/lnet/klnds/openiblnd/openiblnd.c deleted file mode 100644 index 4219005f96046524846ccb78939469fdbb52bd5b..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd.c +++ /dev/null @@ -1,1889 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -lnd_t the_kiblnd = { -#ifdef USING_TSAPI - .lnd_type = CIBLND, -#else - .lnd_type = OPENIBLND, -#endif - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, - .lnd_accept = kibnal_accept, -}; - -kib_data_t kibnal_data; - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, int version, int credits, - lnet_nid_t dstnid, __u64 dststamp) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -int -kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - int msg_version; - int flip; - int msg_nob; - - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if ((expected_version == 0) ? - (msg_version != IBNAL_MSG_VERSION && - msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) : - (msg_version != expected_version)) { - CERROR("Bad version: %x\n", msg_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - LASSERT (sizeof(msg->ibm_type) == 1); - LASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_SVCQRY: - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_SVCRSP: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.svcrsp)) { - CERROR("Short SVCRSP: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.svcrsp))); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->ibm_u.svcrsp.ibsr_svc_id); - __swab16s(&msg->ibm_u.svcrsp.ibsr_svc_pkey); - } - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short CONNREQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.rdma)) { - CERROR("Short RDMA req: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.rdma))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key); - __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob); - __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr); - } - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - } - return 0; -} - -int -kibnal_make_svcqry (kib_conn_t *conn) -{ - kib_peer_t *peer = conn->ibc_peer; - int version = IBNAL_MSG_VERSION; - int msg_version; - kib_msg_t *msg; - struct socket *sock; - int rc; - int nob; - - LASSERT (conn->ibc_connreq != NULL); - msg = &conn->ibc_connreq->cr_msg; - - again: - kibnal_init_msg(msg, IBNAL_MSG_SVCQRY, 0); - kibnal_pack_msg(msg, version, 0, peer->ibp_nid, 0); - - rc = lnet_connect(&sock, peer->ibp_nid, - 0, peer->ibp_ip, peer->ibp_port); - if (rc != 0) - return -ECONNABORTED; - - rc = libcfs_sock_write(sock, msg, msg->ibm_nob, - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d sending svcqry to %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - /* The first 6 bytes are invariably MAGIC + proto version */ - rc = libcfs_sock_read(sock, msg, 6, *kibnal_tunables.kib_timeout); - if (rc != 0) { - CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - CERROR("Bad magic: %08x from %s at %u.%u.%u.%u/%d\n", - msg->ibm_magic, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - msg_version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ? - msg->ibm_version : __swab16(msg->ibm_version); - if (msg_version != version) { - if (version == IBNAL_MSG_VERSION) { - /* retry with previous version */ - libcfs_sock_release(sock); - version = IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD; - goto again; - } - - CERROR("Bad version %x from %s at %u.%u.%u.%u/%d\n", - msg_version, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - /* Read in the rest of the message now we know the expected format */ - nob = offsetof(kib_msg_t, ibm_u) + sizeof(kib_svcrsp_t); - rc = libcfs_sock_read(sock, ((char *)msg) + 6, nob - 6, - *kibnal_tunables.kib_timeout); - if (rc != 0) { - CERROR("Error %d receiving svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - rc = kibnal_unpack_msg(msg, version, nob); - if (rc != 0) { - CERROR("Error %d unpacking svcrsp from %s at %u.%u.%u.%u/%d\n", - rc, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - goto out; - } - - if (msg->ibm_type != IBNAL_MSG_SVCRSP) { - CERROR("Unexpected response type %d from %s at %u.%u.%u.%u/%d\n", - msg->ibm_type, libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Unexpected dst NID/stamp %s/"LPX64" from " - "%s at %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_dstnid), msg->ibm_dststamp, - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - peer->ibp_port); - rc = -EPROTO; - goto out; - } - - if (!lnet_ptlcompat_matchnid(peer->ibp_nid, msg->ibm_srcnid)) { - CERROR("Unexpected src NID %s from %s at %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_srcnid), - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), peer->ibp_port); - rc = -EPROTO; - goto out; - } - - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_connreq->cr_svcrsp = msg->ibm_u.svcrsp; - conn->ibc_version = version; - - out: - libcfs_sock_release(sock); - return rc; -} - -void -kibnal_handle_svcqry (struct socket *sock) -{ - __u32 peer_ip; - unsigned int peer_port; - kib_msg_t *msg; - __u64 srcnid; - __u64 srcstamp; - int version; - int reject = 0; - int rc; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't get peer's IP: %d\n", rc); - return; - } - - LIBCFS_ALLOC(msg, sizeof(*msg)); - if (msg == NULL) { - CERROR("Can't allocate msgs for %u.%u.%u.%u/%d\n", - HIPQUAD(peer_ip), peer_port); - return; - } - - rc = libcfs_sock_read(sock, &msg->ibm_magic, sizeof(msg->ibm_magic), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(1) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - /* Unexpected magic! */ - if (the_lnet.ln_ptlcompat == 0) { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) { - /* future protocol version compatibility! - * When LNET unifies protocols over all LNDs, - * the first thing sent will be a version - * query. I send back a reply in my current - * protocol to tell her I'm "old" */ - kibnal_init_msg(msg, 0, 0); - kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, - LNET_NID_ANY, 0); - reject = 1; - goto reply; - } - - CERROR ("Bad magic(1) %#08x (%#08x expected) from " - "%u.%u.%u.%u/%d\n", msg->ibm_magic, - IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port); - goto out; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. */ - rc = lnet_accept(kibnal_data.kib_ni, sock, msg->ibm_magic); - if (rc != 0) - goto out; - - /* It was an acceptor connection request! - * Now I should see my magic... */ - rc = libcfs_sock_read(sock, &msg->ibm_magic, - sizeof(msg->ibm_magic), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(2) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_magic != IBNAL_MSG_MAGIC && - msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) { - CERROR ("Bad magic(2) %#08x (%#08x expected) from " - "%u.%u.%u.%u/%d\n", msg->ibm_magic, - IBNAL_MSG_MAGIC, HIPQUAD(peer_ip), peer_port); - goto out; - } - } - - /* Now check version */ - - rc = libcfs_sock_read(sock, &msg->ibm_version, sizeof(msg->ibm_version), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(3) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - version = (msg->ibm_magic == IBNAL_MSG_MAGIC) ? - msg->ibm_version : __swab32(msg->ibm_version); - /* Peer is a different protocol version: reply in my current protocol - * to tell her I'm "old" */ - if (version != IBNAL_MSG_VERSION && - version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - kibnal_init_msg(msg, 0, 0); - kibnal_pack_msg(msg, IBNAL_MSG_VERSION, 0, LNET_NID_ANY, 0); - reject = 1; - goto reply; - } - - /* Now read in all the rest */ - rc = libcfs_sock_read(sock, &msg->ibm_type, - offsetof(kib_msg_t, ibm_u) - - offsetof(kib_msg_t, ibm_type), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Error %d receiving svcqry(4) from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - rc = kibnal_unpack_msg(msg, version, offsetof(kib_msg_t, ibm_u)); - if (rc != 0) { - CERROR("Error %d unpacking svcqry from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (msg->ibm_type != IBNAL_MSG_SVCQRY) { - CERROR("Unexpected message %d from %u.%u.%u.%u/%d\n", - msg->ibm_type, HIPQUAD(peer_ip), peer_port); - goto out; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid)) { - CERROR("Unexpected dstnid %s: expected %s from %u.%u.%u.%u/%d\n", - libcfs_nid2str(msg->ibm_dstnid), - libcfs_nid2str(kibnal_data.kib_ni->ni_nid), - HIPQUAD(peer_ip), peer_port); - goto out; - } - - srcnid = msg->ibm_srcnid; - srcstamp = msg->ibm_srcstamp; - - kibnal_init_msg(msg, IBNAL_MSG_SVCRSP, sizeof(msg->ibm_u.svcrsp)); - - msg->ibm_u.svcrsp.ibsr_svc_id = kibnal_data.kib_svc_id; - memcpy(msg->ibm_u.svcrsp.ibsr_svc_gid, kibnal_data.kib_svc_gid, - sizeof(kibnal_data.kib_svc_gid)); - msg->ibm_u.svcrsp.ibsr_svc_pkey = kibnal_data.kib_svc_pkey; - - kibnal_pack_msg(msg, version, 0, srcnid, srcstamp); - - reply: - rc = libcfs_sock_write (sock, msg, msg->ibm_nob, - lnet_acceptor_timeout()); - if (!reject && rc != 0) { - /* Only complain if we're not rejecting */ - CERROR("Error %d replying to svcqry from %u.%u.%u.%u/%d\n", - rc, HIPQUAD(peer_ip), peer_port); - goto out; - } - - out: - LIBCFS_FREE(msg, sizeof(*msg)); -} - -void -kibnal_free_acceptsock (kib_acceptsock_t *as) -{ - libcfs_sock_release(as->ibas_sock); - LIBCFS_FREE(as, sizeof(*as)); -} - -int -kibnal_accept(lnet_ni_t *ni, struct socket *sock) -{ - kib_acceptsock_t *as; - unsigned long flags; - - LIBCFS_ALLOC(as, sizeof(*as)); - if (as == NULL) { - CERROR("Out of Memory\n"); - return -ENOMEM; - } - - as->ibas_sock = sock; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail(&as->ibas_list, &kibnal_data.kib_connd_acceptq); - wake_up(&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - return 0; -} - -int -kibnal_start_ib_listener (void) -{ - int rc; - - LASSERT (kibnal_data.kib_listen_handle == NULL); - - kibnal_data.kib_svc_id = ib_cm_service_assign(); - CDEBUG(D_NET, "svc id "LPX64"\n", kibnal_data.kib_svc_id); - - rc = ib_cached_gid_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - kibnal_data.kib_svc_gid); - if (rc != 0) { - CERROR("Can't get port %d GID: %d\n", - kibnal_data.kib_port, rc); - return rc; - } - - rc = ib_cached_pkey_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - &kibnal_data.kib_svc_pkey); - if (rc != 0) { - CERROR ("Can't get port %d PKEY: %d\n", - kibnal_data.kib_port, rc); - return rc; - } - - rc = ib_cm_listen(kibnal_data.kib_svc_id, - TS_IB_CM_SERVICE_EXACT_MASK, - kibnal_passive_conn_callback, NULL, - &kibnal_data.kib_listen_handle); - if (rc != 0) { - kibnal_data.kib_listen_handle = NULL; - CERROR ("Can't create IB listener: %d\n", rc); - return rc; - } - - LASSERT (kibnal_data.kib_listen_handle != NULL); - return 0; -} - -void -kibnal_stop_ib_listener (void) -{ - int rc; - - LASSERT (kibnal_data.kib_listen_handle != NULL); - - rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle); - if (rc != 0) - CERROR("Error stopping IB listener: %d\n", rc); - - kibnal_data.kib_listen_handle = NULL; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - INIT_LIST_HEAD (&peer->ibp_connd_list); /* not queued for connecting */ - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_nonewpeers) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with kib_global_lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - CDEBUG (D_NET, "peer %s %p deleted\n", - libcfs_nid2str(peer->ibp_nid), peer); - - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty (&peer->ibp_connd_list)); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kibnal_data.kib_npeers); -} - -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || /* persistent peer */ - peer->ibp_connecting != 0 || /* creating conns */ - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - return (peer); - } - return (NULL); -} - -kib_peer_t * -kibnal_get_peer (lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) /* +1 ref for caller? */ - kibnal_peer_addref(peer); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - return (peer); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp, - int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *ipp = peer->ibp_ip; - *portp = peer->ibp_port; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port) -{ - unsigned long flags; - kib_peer_t *peer; - kib_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer (&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_ip = ip; - peer->ibp_port = port; - peer->ibp_persistence++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - int i; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence > 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -kib_conn_t * -kibnal_create_conn (void) -{ - kib_conn_t *conn; - int i; - __u64 vaddr = 0; - __u64 vaddr_base; - int page_offset; - int ipage; - int rc; - union { - struct ib_qp_create_param qp_create; - struct ib_qp_attribute qp_attr; - } params; - - LIBCFS_ALLOC (conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection\n"); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - LIBCFS_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) - goto failed; - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, - IBNAL_RX_MSG_PAGES, - IB_ACCESS_LOCAL_WRITE); - if (rc != 0) - goto failed; - - vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr; - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - - rx->rx_conn = conn; - rx->rx_vaddr = vaddr; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); - - vaddr += IBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - /* We can post up to IBLND_MSG_QUEUE_SIZE immediate/req messages and - * the same # of ack/nak/rdma+done messages */ - - params.qp_create = (struct ib_qp_create_param) { - .limit = { - .max_outstanding_send_request = 3 * IBNAL_MSG_QUEUE_SIZE, - .max_outstanding_receive_request = IBNAL_RX_MSGS, - .max_send_gather_element = 1, - .max_receive_scatter_element = 1, - }, - .pd = kibnal_data.kib_pd, - .send_queue = kibnal_data.kib_cq, - .receive_queue = kibnal_data.kib_cq, - .send_policy = IB_WQ_SIGNAL_SELECTABLE, - .receive_policy = IB_WQ_SIGNAL_SELECTABLE, - .rd_domain = 0, - .transport = IB_TRANSPORT_RC, - .device_specific = NULL, - }; - - rc = ib_qp_create (¶ms.qp_create, &conn->ibc_qp, &conn->ibc_qpn); - if (rc != 0) { - CERROR ("Failed to create queue pair: %d\n", rc); - goto failed; - } - - /* Mark QP created */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - - params.qp_attr = (struct ib_qp_attribute) { - .state = IB_QP_STATE_INIT, - .port = kibnal_data.kib_port, - .enable_rdma_read = 1, - .enable_rdma_write = 1, - .valid_fields = (IB_QP_ATTRIBUTE_STATE | - IB_QP_ATTRIBUTE_PORT | - IB_QP_ATTRIBUTE_PKEY_INDEX | - IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE), - }; - rc = ib_qp_modify(conn->ibc_qp, ¶ms.qp_attr); - if (rc != 0) { - CERROR ("Failed to modify queue pair: %d\n", rc); - goto failed; - } - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - int rc; - - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - LASSERT (conn->ibc_connreq == NULL); - - switch (conn->ibc_state) { - case IBNAL_CONN_ZOMBIE: - /* called after connection sequence initiated */ - - case IBNAL_CONN_INIT_QP: - rc = ib_qp_destroy(conn->ibc_qp); - if (rc != 0) - CERROR("Can't destroy QP: %d\n", rc); - /* fall through */ - - case IBNAL_CONN_INIT_NOTHING: - break; - - default: - LASSERT (0); - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); - - if (atomic_read (&kibnal_data.kib_nconns) == 0 && - kibnal_data.kib_shutdown) { - /* I just nuked the last connection on shutdown; wake up - * everyone so they can exit. */ - wake_up_all(&kibnal_data.kib_sched_waitq); - wake_up_all(&kibnal_data.kib_reaper_waitq); - } -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn %p nid: %s" - " incarnation:"LPX64"("LPX64")\n", conn, - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int port = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &ip, &port, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int rc; - int i; - - if (p->ibp_mapped) { - rc = ib_memory_deregister(p->ibp_handle); - if (rc != 0) - CERROR ("Deregister error: %d\n", rc); - } - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages, int access) -{ - kib_pages_t *p; - struct ib_physical_buffer *phys_pages; - int i; - int rc; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - LIBCFS_ALLOC(phys_pages, npages * sizeof(*phys_pages)); - if (phys_pages == NULL) { - CERROR ("Can't allocate physarray for %d pages\n", npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - - for (i = 0; i < npages; i++) { - phys_pages[i].size = PAGE_SIZE; - phys_pages[i].address = - lnet_page2phys(p->ibp_pages[i]); - } - - p->ibp_vaddr = 0; - rc = ib_memory_register_physical(kibnal_data.kib_pd, - phys_pages, npages, - &p->ibp_vaddr, - npages * PAGE_SIZE, 0, - access, - &p->ibp_handle, - &p->ibp_lkey, - &p->ibp_rkey); - - LIBCFS_FREE(phys_pages, npages * sizeof(*phys_pages)); - - if (rc != 0) { - CERROR ("Error %d mapping %d pages\n", rc, npages); - kibnal_free_pages(p); - return (rc); - } - - p->ibp_mapped = 1; - *pp = p; - return (0); -} - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - __u64 vaddr; - __u64 vaddr_base; - struct page *page; - kib_tx_t *tx; - int i; - int rc; - - /* pre-mapped messages are not bigger than 1 page */ - LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES(), - 0); /* local read access only */ - if (rc != 0) - return (rc); - - vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - - memset (tx, 0, sizeof(*tx)); /* zero flags etc */ - - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset); - tx->tx_vaddr = vaddr; - tx->tx_mapped = KIB_TX_UNMAPPED; - - CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", - i, tx, tx->tx_msg, tx->tx_vaddr); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - vaddr += IBNAL_MSG_SIZE; - LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES()); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - int rc; - unsigned long flags; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - LASSERT(ni == kibnal_data.kib_ni); - LASSERT(ni->ni_data == &kibnal_data); - - switch (kibnal_data.kib_init) { - default: - CERROR ("Unexpected state %d\n", kibnal_data.kib_init); - LBUG(); - - case IBNAL_INIT_ALL: - /* Prevent new peers from being created */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - kibnal_data.kib_nonewpeers = 1; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_stop_ib_listener(); - - /* Remove all existing peers from the peer table */ - kibnal_del_peer(LNET_NID_ANY); - - /* Wait for pending conn reqs to be handled */ - i = 2; - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - while (!list_empty(&kibnal_data.kib_connd_acceptq)) { - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, - flags); - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for conn reqs to clean up\n"); - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to close down\n", - atomic_read(&kibnal_data.kib_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_CQ: - rc = ib_cq_destroy (kibnal_data.kib_cq); - if (rc != 0) - CERROR ("Destroy CQ error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); - /* fall through */ -#if IBNAL_FMR - case IBNAL_INIT_FMR: - rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool); - if (rc != 0) - CERROR ("Destroy FMR pool error: %d\n", rc); - /* fall through */ -#endif - case IBNAL_INIT_PD: - rc = ib_pd_destroy(kibnal_data.kib_pd); - if (rc != 0) - CERROR ("Destroy PD error: %d\n", rc); - /* fall through */ - - case IBNAL_INIT_DATA: - /* Module refcount only gets to zero when all peers - * have been closed so all lists must be empty */ - LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_sched_rxq)); - LASSERT (list_empty (&kibnal_data.kib_sched_txq)); - LASSERT (list_empty (&kibnal_data.kib_reaper_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - LASSERT (list_empty (&kibnal_data.kib_connd_acceptq)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_reaper_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - if (kibnal_data.kib_tx_descs != NULL) - LIBCFS_FREE (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_get_ipoibidx(void) -{ - /* NB single threaded! */ - static struct ib_port_properties port_props; - - int ipoibidx = 0; - int devidx; - int port; - int rc; - struct ib_device *device; - - for (devidx = 0; devidx <= kibnal_data.kib_hca_idx; devidx++) { - device = ib_device_get_by_index(devidx); - - if (device == NULL) { - CERROR("Can't get IB device %d\n", devidx); - return -1; - } - - for (port = 1; port <= 2; port++) { - if (devidx == kibnal_data.kib_hca_idx && - port == kibnal_data.kib_port) - return ipoibidx; - - rc = ib_port_properties_get(device, port, - &port_props); - if (rc == 0) - ipoibidx++; - } - } - - LBUG(); - return -1; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char ipif_name[32]; - __u32 ip; - __u32 netmask; - int up; - struct timeval tv; - int rc; - int hca; - int port; - int i; - int nob; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - - kibnal_data.kib_hca_idx = 0; /* default: first HCA */ - kibnal_data.kib_port = 0; /* any port */ - - if (ni->ni_interfaces[0] != NULL) { - /* hca.port specified in 'networks=openib(h.p)' */ - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - nob = strlen(ni->ni_interfaces[0]); - i = sscanf(ni->ni_interfaces[0], "%d.%d%n", &hca, &port, &nob); - if (i >= 2 && nob == strlen(ni->ni_interfaces[0])) { - kibnal_data.kib_hca_idx = hca; - kibnal_data.kib_port = port; - } else { - nob = strlen(ni->ni_interfaces[0]); - i = sscanf(ni->ni_interfaces[0], "%d%n", &hca, &nob); - - if (i >= 1 && nob == strlen(ni->ni_interfaces[0])) { - kibnal_data.kib_hca_idx = hca; - } else { - CERROR("Can't parse interface '%s'\n", - ni->ni_interfaces[0]); - return -EINVAL; - } - } - } - - kibnal_data.kib_ni = ni; - ni->ni_data = &kibnal_data; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - PORTAL_MODULE_USE; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_reaper_lock); - INIT_LIST_HEAD (&kibnal_data.kib_reaper_conns); - init_waitqueue_head (&kibnal_data.kib_reaper_waitq); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_acceptq); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - INIT_LIST_HEAD (&kibnal_data.kib_sched_txq); - INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) { - CERROR ("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, - (void *)((unsigned long)i)); - if (rc != 0) { - CERROR("Can't spawn openibnal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* must have at least 2 connds to remain responsive to svcqry while - * connecting */ - if (*kibnal_tunables.kib_n_connd < 2) - *kibnal_tunables.kib_n_connd = 2; - - - for (i = 0; i < *kibnal_tunables.kib_n_connd; i++) { - rc = kibnal_thread_start (kibnal_connd, - (void *)((unsigned long)i)); - if (rc != 0) { - CERROR("Can't spawn openibnal connd[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_reaper, NULL); - if (rc != 0) { - CERROR ("Can't spawn openibnal reaper: %d\n", rc); - goto failed; - } - - kibnal_data.kib_device = ib_device_get_by_index(kibnal_data.kib_hca_idx); - if (kibnal_data.kib_device == NULL) { - CERROR ("Can't open ib device %d\n", - kibnal_data.kib_hca_idx); - goto failed; - } - - rc = ib_device_properties_get(kibnal_data.kib_device, - &kibnal_data.kib_device_props); - if (rc != 0) { - CERROR ("Can't get device props: %d\n", rc); - goto failed; - } - - CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n", - kibnal_data.kib_device_props.max_initiator_per_qp, - kibnal_data.kib_device_props.max_responder_per_qp); - - if (kibnal_data.kib_port != 0) { - rc = ib_port_properties_get(kibnal_data.kib_device, - kibnal_data.kib_port, - &kibnal_data.kib_port_props); - if (rc != 0) { - CERROR("Error %d open port %d on HCA %d\n", rc, - kibnal_data.kib_port, - kibnal_data.kib_hca_idx); - goto failed; - } - } else { - for (i = 1; i <= 2; i++) { - rc = ib_port_properties_get(kibnal_data.kib_device, i, - &kibnal_data.kib_port_props); - if (rc == 0) { - kibnal_data.kib_port = i; - break; - } - } - if (kibnal_data.kib_port == 0) { - CERROR ("Can't find a port\n"); - goto failed; - } - } - - i = kibnal_get_ipoibidx(); - if (i < 0) - goto failed; - - snprintf(ipif_name, sizeof(ipif_name), "%s%d", - *kibnal_tunables.kib_ipif_basename, i); - if (strlen(ipif_name) == sizeof(ipif_name - 1)) { - CERROR("IPoIB interface name %s truncated\n", ipif_name); - return -EINVAL; - } - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - goto failed; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - goto failed; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - rc = ib_pd_create(kibnal_data.kib_device, - NULL, &kibnal_data.kib_pd); - if (rc != 0) { - CERROR ("Can't create PD: %d\n", rc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ -#if IBNAL_FMR - { - const int pool_size = *kibnal_tunables.kib_ntx; - struct ib_fmr_pool_param params = { - .max_pages_per_fmr = LNET_MAX_PAYLOAD/PAGE_SIZE, - .access = (IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ), - .pool_size = pool_size, - .dirty_watermark = (pool_size * 3)/4, - .flush_function = NULL, - .flush_arg = NULL, - .cache = 1, - }; - rc = ib_fmr_pool_create(kibnal_data.kib_pd, ¶ms, - &kibnal_data.kib_fmr_pool); - if (rc != 0) { - CERROR ("Can't create FMR pool size %d: %d\n", - pool_size, rc); - goto failed; - } - } - - /* flag FMR pool initialised */ - kibnal_data.kib_init = IBNAL_INIT_FMR; -#endif - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - { - struct ib_cq_callback callback = { - .context = IBNAL_CALLBACK_CTXT, - .policy = IB_CQ_PROVIDER_REARM, - .function = { - .entry = kibnal_callback, - }, - .arg = NULL, - }; - int nentries = IBNAL_CQ_ENTRIES(); - - rc = ib_cq_create (kibnal_data.kib_device, - &nentries, &callback, NULL, - &kibnal_data.kib_cq); - if (rc != 0) { - CERROR ("Can't create CQ: %d\n", rc); - goto failed; - } - - /* I only want solicited events */ - rc = ib_cq_request_notification(kibnal_data.kib_cq, 1); - LASSERT (rc == 0); - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - /*****************************************************/ - - rc = kibnal_start_ib_listener(); - if (rc != 0) - goto failed; - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return 0; - - failed: - kibnal_shutdown(ni); - return -ENETDOWN; -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -#ifdef USING_TSAPI -MODULE_DESCRIPTION("Kernel Cisco IB LND v1.00"); -#else -MODULE_DESCRIPTION("Kernel OpenIB(gen1) LND v1.00"); -#endif -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/openiblnd/openiblnd.h b/lnet/klnds/openiblnd/openiblnd.h deleted file mode 100644 index ad97c9de05009d46ece0690b25e1b5e5b1c708f4..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd.h +++ /dev/null @@ -1,687 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <ts_ib_core.h> -#include <ts_ib_cm.h> -#include <ts_ib_sa_client.h> - -#ifndef USING_TSAPI - -/* OpenIB Gen1 */ -typedef struct ib_qp ib_qp_t; -typedef struct ib_mr ib_mr_t; -typedef struct ib_fmr ib_fmr_t; -typedef struct ib_pd ib_pd_t; -typedef struct ib_cq ib_cq_t; -typedef struct ib_fmr_pool ib_fmr_pool_t; - -#else - -/* Cisco (topspin) */ -typedef void ib_qp_t; -typedef void ib_mr_t; -typedef void ib_fmr_t; -typedef void ib_pd_t; -typedef void ib_cq_t; -typedef void ib_fmr_pool_t; - -#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE -#define IB_WQ_SIGNAL_SELECTABLE TS_IB_ACCESS_LOCAL_WRITE -#define IB_TRANSPORT_RC TS_IB_TRANSPORT_RC -#define IB_QP_STATE_INIT TS_IB_QP_STATE_INIT -#define IB_QP_ATTRIBUTE_STATE TS_IB_QP_ATTRIBUTE_STATE -#define IB_QP_ATTRIBUTE_PORT TS_IB_QP_ATTRIBUTE_PORT -#define IB_QP_ATTRIBUTE_PKEY_INDEX TS_IB_QP_ATTRIBUTE_PKEY_INDEX -#define IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE -#define IB_ACCESS_LOCAL_WRITE TS_IB_ACCESS_LOCAL_WRITE -#define IB_ACCESS_REMOTE_WRITE TS_IB_ACCESS_REMOTE_WRITE -#define IB_ACCESS_REMOTE_READ TS_IB_ACCESS_REMOTE_READ -#define IB_CQ_CALLBACK_INTERRU TS_IB_CQ_CALLBACK_INTERRUPTPT -#define IB_CQ_PROVIDER_REARM TS_IB_CQ_PROVIDER_REARM -#define IB_CQ_CALLBACK_INTERRUPT TS_IB_CQ_CALLBACK_INTERRUPT -#define IB_COMPLETION_STATUS_SUCCESS TS_IB_COMPLETION_STATUS_SUCCESS -#define IB_OP_SEND TS_IB_OP_SEND -#define IB_OP_RDMA_WRITE TS_IB_OP_RDMA_WRITE -#define IB_OP_RDMA_READ TS_IB_OP_RDMA_READ - -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_FMR 1 -//#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS -#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT - - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 6 /* when to eagerly return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ -#define IBNAL_RDMA_BASE 0x0eeb0000 - -/* QP tunables */ -#define IBNAL_RETRY 7 /* # times to retry */ -#define IBNAL_RNR_RETRY 7 /* */ -#define IBNAL_CM_RETRY 7 /* # times to retry connection */ -#define IBNAL_FLOW_CONTROL 1 -#define IBNAL_RESPONDER_RESOURCES 8 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -/* we may have up to 2 completions per transmit + - 1 completion per receive, per connection */ -#define IBNAL_CQ_ENTRIES() ((2*IBNAL_TX_MSGS()) + \ - (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers)) - -typedef struct -{ - char **kib_ipif_basename; /* IPoIB interface base name */ - int *kib_n_connd; /* # connection daemons */ - int *kib_min_reconnect_interval; /* min connect retry seconds... */ - int *kib_max_reconnect_interval; /* max connect retry seconds */ - int *kib_concurrent_peers; /* max # peers */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_keepalive; /* keepalive (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - - struct ctl_table_header *kib_sysctl; /* sysctl interface */ -} kib_tunables_t; - -typedef struct -{ - int ibp_npages; /* # pages */ - int ibp_mapped; /* mapped? */ - __u64 ibp_vaddr; /* mapped region vaddr */ - __u32 ibp_lkey; /* mapped region lkey */ - __u32 ibp_rkey; /* mapped region rkey */ - ib_mr_t *ibp_handle; /* mapped region handle */ - struct page *ibp_pages[0]; -} kib_pages_t; - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ openib interface */ - - __u64 kib_svc_id; /* service number I listen on */ - tTS_IB_GID kib_svc_gid; /* device/port GID */ - __u16 kib_svc_pkey; /* device/port pkey */ - - void *kib_listen_handle; /* IB listen handle */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - int kib_nonewpeers; /* prevent new peers? */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - struct list_head kib_reaper_conns; /* connections to reap */ - wait_queue_head_t kib_reaper_waitq; /* reaper sleeps here */ - unsigned long kib_reaper_waketime; /* when reaper will wake */ - spinlock_t kib_reaper_lock; /* serialise */ - - struct list_head kib_connd_peers; /* peers waiting for a connection */ - struct list_head kib_connd_acceptq; /* accepted sockets to handle */ - wait_queue_head_t kib_connd_waitq; /* connection daemons sleep here */ - int kib_connd_connecting; /* # connds connecting */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - struct list_head kib_sched_txq; /* tx requiring attention */ - struct list_head kib_sched_rxq; /* rx requiring attention */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - int kib_hca_idx; /* my HCA number */ - struct ib_device *kib_device; /* "the" device */ - struct ib_device_properties kib_device_props; /* its properties */ - int kib_port; /* port on the device */ - struct ib_port_properties kib_port_props; /* its properties */ - ib_pd_t *kib_pd; /* protection domain */ -#if IBNAL_FMR - ib_fmr_pool_t *kib_fmr_pool; /* fast memory region pool */ -#endif - ib_cq_t *kib_cq; /* completion queue */ - -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_PD 3 -#define IBNAL_INIT_FMR 4 -#define IBNAL_INIT_TXD 5 -#define IBNAL_INIT_CQ 6 -#define IBNAL_INIT_ALL 7 - -typedef struct kib_acceptsock /* accepted socket queued for connd */ -{ - struct list_head ibas_list; /* queue for attention */ - struct socket *ibas_sock; /* the accepted socket */ -} kib_acceptsock_t; - -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - * They may be sent via TCP/IP (service ID,GID,PKEY query/response), - * as private data in the connection request/response, or "normally". - */ - -typedef struct kib_svcrsp /* service response */ -{ - __u64 ibsr_svc_id; /* service's id */ - __u8 ibsr_svc_gid[16]; /* service's gid */ - __u16 ibsr_svc_pkey; /* service's pkey */ -} WIRE_ATTR kib_svcrsp_t; - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - union { - ib_mr_t *mr; - ib_fmr_t *fmr; - } md_handle; - __u32 md_lkey; - __u32 md_rkey; - __u64 md_addr; -} kib_md_t; - -typedef struct -{ - __u32 rd_key; /* remote key */ - __u32 rd_nob; /* # of bytes */ - __u64 rd_addr; /* remote io vaddr */ -} WIRE_ATTR kib_rdma_desc_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t ibrm_hdr; /* portals header */ - __u64 ibrm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibrm_desc; /* where to suck/blow */ -} WIRE_ATTR kib_rdma_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __u32 ibcm_status; /* completion status */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - union { - kib_svcrsp_t svcrsp; - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_rdma_msg_t rdma; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_OPENIB_MAGIC /* unique magic */ -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 2 /* previous protocol version */ -#define IBNAL_MSG_VERSION 3 /* current protocol version */ - -#define IBNAL_MSG_SVCQRY 0xb0 /* service query */ -#define IBNAL_MSG_SVCRSP 0xb1 /* service response */ -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* portals hdr + payload */ -#define IBNAL_MSG_PUT_RDMA 0xd2 /* portals PUT hdr + source rdma desc */ -#define IBNAL_MSG_PUT_DONE 0xd3 /* signal PUT rdma completion */ -#define IBNAL_MSG_GET_RDMA 0xd4 /* portals GET hdr + sink rdma desc */ -#define IBNAL_MSG_GET_DONE 0xd5 /* signal GET rdma completion */ - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - __u64 rx_vaddr; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - struct ib_receive_param rx_sp; /* receive work item */ - struct ib_gather_scatter rx_gl; /* and it's memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_mapped; /* mapped for RDMA? */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - int tx_passive_rdma; /* peer sucks/blows */ - int tx_passive_rdma_wait; /* waiting for peer to complete */ - __u64 tx_passive_rdma_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ - kib_md_t tx_md; /* RDMA mapping (active/passive) */ - __u64 tx_vaddr; /* pre-mapped buffer (hca vaddr) */ - kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */ - int tx_nsp; /* # send work items */ - struct ib_send_param tx_sp[2]; /* send work items... */ - struct ib_gather_scatter tx_gl[2]; /* ...and their memory */ -} kib_tx_t; - -#define KIB_TX_UNMAPPED 0 -#define KIB_TX_MAPPED 1 -#define KIB_TX_MAPPED_FMR 2 - -typedef struct kib_connreq -{ - /* active connection-in-progress state */ - struct kib_conn *cr_conn; - kib_msg_t cr_msg; - __u64 cr_tid; - tTS_IB_GID cr_gid; - kib_svcrsp_t cr_svcrsp; - struct ib_path_record cr_path; - struct ib_cm_active_param cr_connparam; -} kib_connreq_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - int ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a credit */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - ib_qp_t *ibc_qp; /* queue pair */ - __u32 ibc_qpn; /* queue pair number */ - tTS_IB_CM_COMM_ID ibc_comm_id; /* connection ID? */ - kib_connreq_t *ibc_connreq; /* connection request state */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */ -#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */ -#define IBNAL_CONN_CONNECTING 2 /* started to connect */ -#define IBNAL_CONN_ESTABLISHED 3 /* connection established */ -#define IBNAL_CONN_DEATHROW 4 /* waiting to be closed */ -#define IBNAL_CONN_ZOMBIE 5 /* waiting to be freed */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - __u32 ibp_ip; /* IP to query for peer conn params */ - int ibp_port; /* port to qery for peer conn params */ - __u64 ibp_incarnation; /* peer's incarnation */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* current active connection attempts */ - int ibp_accepting; /* current passive connection attempts */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -/******************************************************************************/ - -/* these are purposely avoiding using local vars so they don't increase - * stack consumption. */ - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_reaper_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_reaper_conns); \ - wake_up(&kibnal_data.kib_reaper_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_reaper_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -/******************************************************************************/ - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active(kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nsp > 0); /* work items set up */ - LASSERT (tx->tx_conn == NULL); /* only set here */ - - kibnal_conn_addref(conn); - tx->tx_conn = conn; - tx->tx_deadline = jiffies + *kibnal_tunables.kib_timeout * HZ; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA completion: no credits; peer has reserved a - * reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -/* CAVEAT EMPTOR: - * We rely on tx/rx descriptor alignment to allow us to use the lowest bit - * of the work request id as a flag to determine if the completion is for a - * transmit or a receive. It seems that that the CQ entry's 'op' field - * isn't always set correctly on completions that occur after QP teardown. */ - -static inline __u64 -kibnal_ptr2wreqid (void *ptr, int isrx) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & 1) == 0); - return (__u64)(lptr | (isrx ? 1 : 0)); -} - -static inline void * -kibnal_wreqid2ptr (__u64 wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~1UL); -} - -static inline int -kibnal_wreqid_is_rx (__u64 wreqid) -{ - return (wreqid & 1) != 0; -} - -#if (IB_NTXRXPARAMS == 3) -static inline int -kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p) -{ - return ib_send(qp, p, 1); -} - -static inline int -kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p) -{ - return ib_receive(qp, p, 1); -} -#elif (IB_NTXRXPARAMS == 4) -static inline int -kibnal_ib_send(ib_qp_t *qp, struct ib_send_param *p) -{ - return ib_send(qp, p, 1, NULL); -} - -static inline int -kibnal_ib_receive(ib_qp_t *qp, struct ib_receive_param *p) -{ - return ib_receive(qp, p, 1, NULL); -} -#else - #error "IB_NTXRXPARAMS not set correctly" -#endif - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kibnal_accept(lnet_ni_t *ni, struct socket *sock); - -extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -extern void kibnal_pack_msg(kib_msg_t *msg, int version, int credits, - lnet_nid_t dstnid, __u64 dststamp); -extern int kibnal_unpack_msg(kib_msg_t *msg, int expected_version, int nob); -extern void kibnal_handle_svcqry (struct socket *sock); -extern int kibnal_make_svcqry (kib_conn_t *conn); -extern void kibnal_free_acceptsock (kib_acceptsock_t *as); -extern int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid); -extern void kibnal_destroy_peer (kib_peer_t *peer); -extern int kibnal_add_persistent_peer(lnet_nid_t nid, __u32 ip, int port); -extern int kibnal_del_peer (lnet_nid_t nid); -extern kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid); -extern void kibnal_unlink_peer_locked (kib_peer_t *peer); -extern void kibnal_peer_alive(kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, - __u64 incarnation); -extern kib_conn_t *kibnal_create_conn (void); -extern void kibnal_destroy_conn (kib_conn_t *conn); -extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access); -extern void kibnal_free_pages (kib_pages_t *p); - -extern void kibnal_check_sends (kib_conn_t *conn); - -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_bad_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); -extern tTS_IB_CM_CALLBACK_RETURN -kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, - void *param, void *arg); - -extern void kibnal_close_conn_locked (kib_conn_t *conn, int error); -extern void kibnal_destroy_conn (kib_conn_t *conn); -extern int kibnal_thread_start (int (*fn)(void *arg), void *arg); -extern int kibnal_scheduler(void *arg); -extern int kibnal_connd (void *arg); -extern int kibnal_reaper (void *arg); -extern void kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg); -extern void kibnal_txlist_done (struct list_head *txlist, int status); -extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); -extern int kibnal_close_conn (kib_conn_t *conn, int why); -extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob); - -extern int kibnal_tunables_init(void); -extern void kibnal_tunables_fini(void); diff --git a/lnet/klnds/openiblnd/openiblnd_cb.c b/lnet/klnds/openiblnd/openiblnd_cb.c deleted file mode 100644 index 75f3e2362f2d23de2395852b65a984be32960746..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd_cb.c +++ /dev/null @@ -1,2601 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -/* - * LIB functions follow - * - */ -void -kibnal_schedule_tx_done (kib_tx_t *tx) -{ - unsigned long flags; - - spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags); - - list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq); - wake_up (&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - unsigned long flags; - int i; - int rc; - - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting callback */ - LASSERT (!tx->tx_passive_rdma_wait); /* mustn't be awaiting RDMA */ - - if (in_interrupt()) { - /* can't deregister memory/flush FMAs/finalize in IRQ context... */ - kibnal_schedule_tx_done(tx); - return; - } - - switch (tx->tx_mapped) { - default: - LBUG(); - - case KIB_TX_UNMAPPED: - break; - - case KIB_TX_MAPPED: - rc = ib_memory_deregister(tx->tx_md.md_handle.mr); - LASSERT (rc == 0); - tx->tx_mapped = KIB_TX_UNMAPPED; - break; - -#if IBNAL_FMR - case KIB_TX_MAPPED_FMR: - rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr); - LASSERT (rc == 0); - -#ifndef USING_TSAPI - /* Somewhat belt-and-braces since the tx's conn has closed if - * this was a passive RDMA waiting to complete... */ - if (tx->tx_status != 0) - ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool); -#endif - tx->tx_mapped = KIB_TX_UNMAPPED; - break; -#endif - } - - /* tx may have up to 2 ptlmsgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - rc = tx->tx_status; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nsp = 0; - tx->tx_passive_rdma = 0; - tx->tx_status = 0; - - spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); - - list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - unsigned long flags; - kib_tx_t *tx; - - spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new passive RDMA completion cookie. It might not be - * needed, but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags); - - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - LASSERT (tx->tx_nsp == 0); - LASSERT (tx->tx_sending == 0); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (!tx->tx_passive_rdma); - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -void -kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status) -{ - struct list_head *ttmp; - unsigned long flags; - int idle; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each (ttmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list); - - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - - if (!tx->tx_passive_rdma_wait || - tx->tx_passive_rdma_cookie != cookie) - continue; - - CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status); - - /* XXX Set mlength of reply here */ - - tx->tx_status = status; - tx->tx_passive_rdma_wait = 0; - idle = (tx->tx_sending == 0); - - if (idle) - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - /* I could be racing with tx callbacks. It's whoever - * _makes_ tx idle that frees it */ - if (idle) - kibnal_tx_done (tx); - return; - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - CERROR ("Unmatched (late?) RDMA completion "LPX64" from %s\n", - cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); -} - -void -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc; - unsigned long flags; - - LASSERT(!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (struct ib_gather_scatter) { - .address = rx->rx_vaddr, - .length = IBNAL_MSG_SIZE, - .key = conn->ibc_rx_pages->ibp_lkey, - }; - - rx->rx_sp = (struct ib_receive_param) { - .work_request_id = kibnal_ptr2wreqid(rx, 1), - .scatter_list = &rx->rx_gl, - .num_scatter_entries = 1, - .device_specific = NULL, - .signaled = 1, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - LASSERT (rx->rx_nob >= 0); /* not posted */ - rx->rx_nob = -1; /* is now */ - mb(); - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - rc = -ECONNABORTED; - else - rc = kibnal_ib_receive(conn->ibc_qp, &rx->rx_sp); - - if (rc == 0) { - if (credit || rsrvd_credit) { - spin_lock_irqsave(&conn->ibc_lock, flags); - - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); - } - return; - } - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - CERROR ("Error posting receive -> %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_close_conn (rx->rx_conn, rc); - } else { - CDEBUG (D_NET, "Error posting receive -> %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - } - - /* Drop rx's ref */ - kibnal_conn_decref(conn); -} - -void -kibnal_rx_callback (struct ib_cq_entry *e) -{ - kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(e->work_request_id); - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits; - unsigned long flags; - int rc; - int err = -ECONNABORTED; - - CDEBUG (D_NET, "rx %p conn %p\n", rx, conn); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - mb(); - - /* receives complete with error in any case after we've started - * closing the QP */ - if (conn->ibc_state >= IBNAL_CONN_DEATHROW) - goto failed; - - /* We don't post receives until the conn is established */ - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - if (e->status != IB_COMPLETION_STATUS_SUCCESS) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status); - goto failed; - } - - LASSERT (e->bytes_transferred >= 0); - rx->rx_nob = e->bytes_transferred; - mb(); - - rc = kibnal_unpack_msg(msg, conn->ibc_version, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - err = -ESTALE; - goto failed; - } - - /* Have I received credits that will let me send? */ - credits = msg->ibm_credits; - if (credits != 0) { - spin_lock_irqsave(&conn->ibc_lock, flags); - conn->ibc_credits += credits; - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - case IBNAL_MSG_NOOP: - kibnal_post_rx (rx, 1, 0); - return; - - case IBNAL_MSG_IMMEDIATE: - break; - - case IBNAL_MSG_PUT_RDMA: - case IBNAL_MSG_GET_RDMA: - CDEBUG(D_NET, "%d RDMA: cookie "LPX64", key %x, addr "LPX64", nob %d\n", - msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie, - msg->ibm_u.rdma.ibrm_desc.rd_key, - msg->ibm_u.rdma.ibrm_desc.rd_addr, - msg->ibm_u.rdma.ibrm_desc.rd_nob); - break; - - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n", - msg->ibm_type, msg->ibm_u.completion.ibcm_cookie, - msg->ibm_u.completion.ibcm_status); - - kibnal_complete_passive_rdma (conn, - msg->ibm_u.completion.ibcm_cookie, - msg->ibm_u.completion.ibcm_status); - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - kibnal_post_rx (rx, 1, 0); - } else { - /* this reply buffer was pre-reserved */ - kibnal_post_rx (rx, 0, 1); - } - return; - - default: - CERROR ("Bad msg type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - kibnal_peer_alive(conn->ibc_peer); - - /* schedule for kibnal_rx() in thread context */ - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq); - wake_up (&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kibnal_close_conn(conn, err); - - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -void -kibnal_rx (kib_rx_t *rx) -{ - int rc = 0; - kib_msg_t *msg = rx->rx_msg; - - switch (msg->ibm_type) { - case IBNAL_MSG_GET_RDMA: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, - msg->ibm_srcnid, rx, 1); - break; - - case IBNAL_MSG_PUT_RDMA: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.rdma.ibrm_hdr, - msg->ibm_srcnid, rx, 1); - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - break; - - default: - LBUG(); - break; - } - - if (rc < 0) { - kibnal_close_conn(rx->rx_conn, rc); - kibnal_post_rx (rx, 1, 0); - } -} - -#if 0 -int -kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#if CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (-EFAULT); - - *physp = lnet_page2phys(page) + (vaddr & (PAGE_SIZE - 1)); - return (0); -} -#endif - -int -kibnal_map_iov (kib_tx_t *tx, int access, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - void *vaddr; - int rc; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = (void *)(((unsigned long)iov->iov_base) + offset); - tx->tx_md.md_addr = (__u64)((unsigned long)vaddr); - - rc = ib_memory_register (kibnal_data.kib_pd, - vaddr, nob, - access, - &tx->tx_md.md_handle.mr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); - - if (rc != 0) { - CERROR ("Can't map vaddr: %d\n", rc); - return (rc); - } - - tx->tx_mapped = KIB_TX_MAPPED; - return (0); -} - -int -kibnal_map_kiov (kib_tx_t *tx, int access, - int nkiov, lnet_kiov_t *kiov, - int offset, int nob) -{ -#if IBNAL_FMR - __u64 *phys; - const int mapped = KIB_TX_MAPPED_FMR; -#else - struct ib_physical_buffer *phys; - const int mapped = KIB_TX_MAPPED; -#endif - int page_offset; - int nphys; - int resid; - int phys_size; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - phys_size = nkiov * sizeof (*phys); - LIBCFS_ALLOC(phys, phys_size); - if (phys == NULL) { - CERROR ("Can't allocate tmp phys\n"); - return (-ENOMEM); - } - - page_offset = kiov->kiov_offset + offset; -#if IBNAL_FMR - phys[0] = lnet_page2phys(kiov->kiov_page); -#else - phys[0].address = lnet_page2phys(kiov->kiov_page); - phys[0].size = PAGE_SIZE; -#endif - nphys = 1; - resid = nob - (kiov->kiov_len - offset); - - while (resid > 0) { - kiov++; - nkiov--; - LASSERT (nkiov > 0); - - if (kiov->kiov_offset != 0 || - ((resid > PAGE_SIZE) && - kiov->kiov_len < PAGE_SIZE)) { - int i; - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", nphys, - kiov->kiov_offset, kiov->kiov_len); - - for (i = -nphys; i < nkiov; i++) - { - CERROR("kiov[%d] %p +%d for %d\n", - i, kiov[i].kiov_page, kiov[i].kiov_offset, kiov[i].kiov_len); - } - - rc = -EINVAL; - goto out; - } - - if (nphys == LNET_MAX_IOV) { - CERROR ("payload too big (%d)\n", nphys); - rc = -EMSGSIZE; - goto out; - } - - LASSERT (nphys * sizeof (*phys) < phys_size); -#if IBNAL_FMR - phys[nphys] = lnet_page2phys(kiov->kiov_page); -#else - phys[nphys].address = lnet_page2phys(kiov->kiov_page); - phys[nphys].size = PAGE_SIZE; -#endif - nphys++; - - resid -= PAGE_SIZE; - } - - tx->tx_md.md_addr = IBNAL_RDMA_BASE; - -#if IBNAL_FMR - rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool, - phys, nphys, - &tx->tx_md.md_addr, - page_offset, - &tx->tx_md.md_handle.fmr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); -#else - rc = ib_memory_register_physical (kibnal_data.kib_pd, - phys, nphys, - &tx->tx_md.md_addr, - nob, page_offset, - access, - &tx->tx_md.md_handle.mr, - &tx->tx_md.md_lkey, - &tx->tx_md.md_rkey); -#endif - if (rc == 0) { - CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: lkey %x, rkey %x\n", - nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey); - tx->tx_mapped = mapped; - } else { - CERROR ("Can't map phys: %d\n", rc); - rc = -EFAULT; - } - - out: - LIBCFS_FREE(phys, phys_size); - return (rc); -} - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - unsigned long flags; - kib_tx_t *tx; - int rc; - int i; - int consume_credit; - int done; - int nwork; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - LASSERT (conn->ibc_nsends_posted <= IBNAL_RX_MSGS); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock_irqsave(&conn->ibc_lock, flags); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_credit = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_credit = 1; - } else { - /* nothing waiting */ - break; - } - - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= 2); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - /* Not on ibc_rdma_queue */ - LASSERT (!tx->tx_passive_rdma_wait); - - if (conn->ibc_nsends_posted == IBNAL_RX_MSGS) - break; - - if (consume_credit) { - if (conn->ibc_credits == 0) /* no credits */ - break; - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) /* giving back credits */ - break; - } - - list_del (&tx->tx_list); - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock_irqrestore(&conn->ibc_lock, flags); - kibnal_tx_done(tx); - spin_lock_irqsave(&conn->ibc_lock, flags); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_credit) - conn->ibc_credits--; - - tx->tx_sending = tx->tx_nsp; - tx->tx_passive_rdma_wait = tx->tx_passive_rdma; - list_add (&tx->tx_list, &conn->ibc_active_txs); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - /* NB the gap between removing tx from the queue and sending it - * allows message re-ordering to occur */ - - LASSERT (tx->tx_nsp > 0); - - rc = -ECONNABORTED; - nwork = 0; - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - tx->tx_status = 0; - /* Driver only accepts 1 item at a time */ - for (i = 0; i < tx->tx_nsp; i++) { - rc = kibnal_ib_send(conn->ibc_qp, &tx->tx_sp[i]); - if (rc != 0) - break; - nwork++; - } - } - - conn->ibc_last_send = jiffies; - - spin_lock_irqsave (&conn->ibc_lock, flags); - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_credit) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_passive_rdma_wait = 0; - tx->tx_sending -= tx->tx_nsp - nwork; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); -} - -void -kibnal_tx_callback (struct ib_cq_entry *e) -{ - kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(e->work_request_id); - kib_conn_t *conn; - unsigned long flags; - int idle; - - conn = tx->tx_conn; - LASSERT (conn != NULL); - LASSERT (tx->tx_sending != 0); - - spin_lock_irqsave(&conn->ibc_lock, flags); - - CDEBUG(D_NET, "conn %p tx %p [%d/%d]: %d\n", conn, tx, - tx->tx_nsp - tx->tx_sending, tx->tx_nsp, - e->status); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. If it's - * not me, then I take an extra ref on conn so it can't disappear - * under me. */ - - tx->tx_sending--; - idle = (tx->tx_sending == 0) && /* This is the final callback */ - (!tx->tx_passive_rdma_wait); /* Not waiting for RDMA completion */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); - - if (tx->tx_sending == 0) - conn->ibc_nsends_posted--; - - if (e->status != IB_COMPLETION_STATUS_SUCCESS && - tx->tx_status == 0) - tx->tx_status = -ECONNABORTED; - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - if (idle) - kibnal_tx_done (tx); - - if (e->status != IB_COMPLETION_STATUS_SUCCESS) { - CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), e->status); - kibnal_close_conn (conn, -ENETDOWN); - } else { - kibnal_peer_alive(conn->ibc_peer); - /* can I shovel some more sends out the door? */ - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); -} - -void -kibnal_callback (ib_cq_t *cq, struct ib_cq_entry *e, void *arg) -{ - if (kibnal_wreqid_is_rx(e->work_request_id)) - kibnal_rx_callback (e); - else - kibnal_tx_callback (e); -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - struct ib_gather_scatter *gl = &tx->tx_gl[tx->tx_nsp]; - struct ib_send_param *sp = &tx->tx_sp[tx->tx_nsp]; - int fence; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - - LASSERT (tx->tx_nsp >= 0 && - tx->tx_nsp < sizeof(tx->tx_sp)/sizeof(tx->tx_sp[0])); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - /* Fence the message if it's bundled with an RDMA read */ - fence = (tx->tx_nsp > 0) && - (type == IBNAL_MSG_PUT_DONE); - - *gl = (struct ib_gather_scatter) { - .address = tx->tx_vaddr, - .length = nob, - .key = kibnal_data.kib_tx_pages->ibp_lkey, - }; - - /* NB If this is an RDMA read, the completion message must wait for - * the RDMA to complete. Sends wait for previous RDMA writes - * anyway... */ - *sp = (struct ib_send_param) { - .work_request_id = kibnal_ptr2wreqid(tx, 0), - .op = IB_OP_SEND, - .gather_list = gl, - .num_gather_entries = 1, - .device_specific = NULL, - .solicited_event = 1, - .signaled = 1, - .immediate_data_valid = 0, - .fence = fence, - .inline_data = 0, - }; - - tx->tx_nsp++; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - unsigned long flags; - - spin_lock_irqsave(&conn->ibc_lock, flags); - - kibnal_queue_tx_locked (tx, conn); - - spin_unlock_irqrestore(&conn->ibc_lock, flags); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_active_connect_locked (kib_peer_t *peer) -{ - /* Called with exclusive kib_global_lock */ - - peer->ibp_connecting++; - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock (&kibnal_data.kib_connd_lock); - - LASSERT (list_empty(&peer->ibp_connd_list)); - list_add_tail (&peer->ibp_connd_list, - &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock (&kibnal_data.kib_connd_lock); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - unsigned long flags; - kib_peer_t *peer; - kib_conn_t *conn; - int retry; - int rc; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nsp > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me...*/ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore (g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - tx->tx_status = -EHOSTUNREACH; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - tx->tx_status = rc; - kibnal_tx_done(tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* +1 ref from me... */ - write_unlock_irqrestore (g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (peer->ibp_connecting == 0 && - peer->ibp_accepting == 0) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore (g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - kibnal_tx_done (tx); - return; - } - - kibnal_schedule_active_connect_locked(peer); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore (g_lock, flags); -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty(txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -int -kibnal_start_passive_rdma (int type, lnet_msg_t *lntmsg, - int niov, struct iovec *iov, lnet_kiov_t *kiov, - int nob) -{ - lnet_nid_t nid = lntmsg->msg_target.nid; - kib_tx_t *tx; - kib_msg_t *ibmsg; - int rc; - int access; - - LASSERT (type == IBNAL_MSG_PUT_RDMA || - type == IBNAL_MSG_GET_RDMA); - LASSERT (nob > 0); - LASSERT (!in_interrupt()); /* Mapping could block */ - - if (type == IBNAL_MSG_PUT_RDMA) { - access = IB_ACCESS_REMOTE_READ; - } else { - access = IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_LOCAL_WRITE; - } - - tx = kibnal_get_idle_tx (); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - (type == IBNAL_MSG_PUT_RDMA) ? "PUT/REPLY" : "GET", - libcfs_nid2str(nid)); - return -ENOMEM; - } - - - if (iov != NULL) - rc = kibnal_map_iov (tx, access, niov, iov, 0, nob); - else - rc = kibnal_map_kiov (tx, access, niov, kiov, 0, nob); - - if (rc != 0) { - CERROR ("Can't map RDMA for %s: %d\n", - libcfs_nid2str(nid), rc); - goto failed; - } - - if (type == IBNAL_MSG_GET_RDMA) { - /* reply gets finalized when tx completes */ - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR ("Can't create reply for GET -> %s\n", - libcfs_nid2str(nid)); - rc = -ENOMEM; - goto failed; - } - } - - tx->tx_passive_rdma = 1; - - ibmsg = tx->tx_msg; - - ibmsg->ibm_u.rdma.ibrm_hdr = lntmsg->msg_hdr; - ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie; - ibmsg->ibm_u.rdma.ibrm_desc.rd_key = tx->tx_md.md_rkey; - ibmsg->ibm_u.rdma.ibrm_desc.rd_addr = tx->tx_md.md_addr; - ibmsg->ibm_u.rdma.ibrm_desc.rd_nob = nob; - - kibnal_init_tx_msg (tx, type, sizeof (kib_rdma_msg_t)); - - CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr " - LPX64", nob %d\n", - tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey, - tx->tx_md.md_addr, nob); - - /* lntmsg gets finalized when tx completes. */ - tx->tx_lntmsg[0] = lntmsg; - - kibnal_launch_tx(tx, nid); - return (0); - - failed: - tx->tx_status = rc; - kibnal_tx_done (tx); - return (-EIO); -} - -void -kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lnet_msg_t *lntmsg, - unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob) -{ - kib_msg_t *rxmsg = rx->rx_msg; - kib_msg_t *txmsg; - kib_tx_t *tx; - int access; - int rdma_op; - int rc; - - CDEBUG(D_NET, "type %d, status %d, niov %d, offset %d, nob %d\n", - type, status, niov, offset, nob); - - /* Called by scheduler */ - LASSERT (!in_interrupt ()); - - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - /* No data if we're completing with failure */ - LASSERT (status == 0 || nob == 0); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - if (type == IBNAL_MSG_GET_DONE) { - access = 0; - rdma_op = IB_OP_RDMA_WRITE; - LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA); - } else { - access = IB_ACCESS_LOCAL_WRITE; - rdma_op = IB_OP_RDMA_READ; - LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA); - } - - tx = kibnal_get_idle_tx (); - if (tx == NULL) { - CERROR ("tx descs exhausted on RDMA from %s" - " completing locally with failure\n", - libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid)); - lnet_finalize (kibnal_data.kib_ni, lntmsg, -ENOMEM); - return; - } - LASSERT (tx->tx_nsp == 0); - - if (nob != 0) { - /* We actually need to transfer some data (the transfer - * size could get truncated to zero when the incoming - * message is matched) */ - - if (kiov != NULL) - rc = kibnal_map_kiov (tx, access, - niov, kiov, offset, nob); - else - rc = kibnal_map_iov (tx, access, - niov, iov, offset, nob); - - if (rc != 0) { - CERROR ("Can't map RDMA -> %s: %d\n", - libcfs_nid2str(rx->rx_conn->ibc_peer->ibp_nid), - rc); - /* We'll skip the RDMA and complete with failure. */ - status = rc; - nob = 0; - } else { - tx->tx_gl[0] = (struct ib_gather_scatter) { - .address = tx->tx_md.md_addr, - .length = nob, - .key = tx->tx_md.md_lkey, - }; - - tx->tx_sp[0] = (struct ib_send_param) { - .work_request_id = kibnal_ptr2wreqid(tx, 0), - .op = rdma_op, - .gather_list = &tx->tx_gl[0], - .num_gather_entries = 1, - .remote_address = rxmsg->ibm_u.rdma.ibrm_desc.rd_addr, - .rkey = rxmsg->ibm_u.rdma.ibrm_desc.rd_key, - .device_specific = NULL, - .solicited_event = 0, - .signaled = 1, - .immediate_data_valid = 0, - .fence = 0, - .inline_data = 0, - }; - - tx->tx_nsp = 1; - } - } - - txmsg = tx->tx_msg; - - txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie; - txmsg->ibm_u.completion.ibcm_status = status; - - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - if (status == 0 && nob != 0) { - LASSERT (tx->tx_nsp > 1); - /* RDMA: lntmsg gets finalized when the tx completes. This - * is after the completion message has been sent, which in - * turn is after the RDMA has finished. */ - tx->tx_lntmsg[0] = lntmsg; - } else { - LASSERT (tx->tx_nsp == 1); - /* No RDMA: local completion happens now! */ - CDEBUG(D_NET, "No data: immediate completion\n"); - lnet_finalize (kibnal_data.kib_ni, lntmsg, - status == 0 ? 0 : -EIO); - } - - kibnal_queue_tx(tx, rx->rx_conn); -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context if we're sending payload */ - LASSERT (!in_interrupt() || payload_niov == 0); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, NULL, - lntmsg->msg_md->md_length); - - return kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, lntmsg, - lntmsg->msg_md->md_niov, - NULL, lntmsg->msg_md->md_iov.kiov, - lntmsg->msg_md->md_length); - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - return kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA, lntmsg, - payload_niov, - payload_iov, payload_kiov, - payload_nob); - } - - /* Send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted%s\n", - type, libcfs_nid2str(target.nid), - in_interrupt() ? " (intr)" : ""); - return (-ENOMEM); - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, - offsetof(kib_immediate_msg_t, - ibim_payload[payload_nob])); - - /* lntmsg gets finalized when tx completes */ - tx->tx_lntmsg[0] = lntmsg; - - kibnal_launch_tx(tx, target.nid); - return (0); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR("Dropping message from %s: no buffers free. " - "%s is running an old version of LNET that may " - "deadlock if messages wait for buffers)\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - int msg_nob; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt ()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (msg_nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - msg_nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov( - niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov( - niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_GET_RDMA: - if (lntmsg != NULL) { - /* GET matched: RDMA lntmsg's payload */ - kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0, - rx, lntmsg, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - } else { - /* GET didn't match anything */ - kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -ENODATA, - rx, NULL, 0, NULL, NULL, 0, 0); - } - break; - - case IBNAL_MSG_PUT_RDMA: - kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0, rx, lntmsg, - niov, iov, kiov, offset, mlen); - break; - } - - kibnal_post_rx(rx, 1, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immmediate housekeeping, and schedules the - * connection for the reaper to finish off. - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - CDEBUG (error == 0 ? D_NET : D_NETERROR, - "closing conn to %s: error %d\n", - libcfs_nid2str(peer->ibp_nid), error); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED || - conn->ibc_state == IBNAL_CONN_CONNECTING); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - /* kib_reaper_conns takes ibc_list's ref */ - list_del (&conn->ibc_list); - } else { - /* new ref for kib_reaper_conns */ - kibnal_conn_addref(conn); - } - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - peer->ibp_error = error; /* set/clear error on last conn */ - } - - conn->ibc_state = IBNAL_CONN_DEATHROW; - - /* Schedule conn for closing/destruction */ - spin_lock (&kibnal_data.kib_reaper_lock); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_reaper_conns); - wake_up (&kibnal_data.kib_reaper_waitq); - - spin_unlock (&kibnal_data.kib_reaper_lock); -} - -int -kibnal_close_conn (kib_conn_t *conn, int why) -{ - unsigned long flags; - int count = 0; - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING); - - if (conn->ibc_state <= IBNAL_CONN_ESTABLISHED) { - count = 1; - kibnal_close_conn_locked (conn, why); - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return (count); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - LASSERT(error != 0); - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting != 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting != 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way... */ - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + - peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits; I'll complete - * them with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - (peer->ibp_persistence == 0)) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (!list_empty (&zombies)) - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done(&zombies, -EHOSTUNREACH); -} - -void -kibnal_connreq_done (kib_conn_t *conn, int active, int status) -{ - int state = conn->ibc_state; - kib_peer_t *peer = conn->ibc_peer; - kib_tx_t *tx; - unsigned long flags; - int rc; - int i; - - if (conn->ibc_connreq != NULL) { - LIBCFS_FREE (conn->ibc_connreq, sizeof (*conn->ibc_connreq)); - conn->ibc_connreq = NULL; - } - - switch (state) { - case IBNAL_CONN_CONNECTING: - /* conn has a CM comm_id */ - if (status == 0) { - /* Install common (active/passive) callback for - * disconnect/idle notification */ - rc = tsIbCmCallbackModify(conn->ibc_comm_id, - kibnal_conn_callback, - conn); - LASSERT (rc == 0); - } else { - /* LASSERT (no more CM callbacks) */ - rc = tsIbCmCallbackModify(conn->ibc_comm_id, - kibnal_bad_conn_callback, - conn); - LASSERT (rc == 0); - } - break; - - case IBNAL_CONN_INIT_QP: - LASSERT (status != 0); - break; - - default: - LBUG(); - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - if (active) - LASSERT (peer->ibp_connecting != 0); - else - LASSERT (peer->ibp_accepting != 0); - - if (status == 0 && /* connection established */ - kibnal_peer_active(peer)) { /* peer not deleted */ - - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - conn->ibc_last_send = jiffies; - conn->ibc_state = IBNAL_CONN_ESTABLISHED; - kibnal_peer_alive(peer); - - /* +1 ref for ibc_list; caller(== CM)'s ref remains until - * the IB_CM_IDLE callback */ - kibnal_conn_addref(conn); - list_add (&conn->ibc_list, &peer->ibp_conns); - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - /* post blocked sends to the new connection */ - spin_lock (&conn->ibc_lock); - - while (!list_empty (&peer->ibp_tx_queue)) { - tx = list_entry (peer->ibp_tx_queue.next, - kib_tx_t, tx_list); - - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - - spin_unlock (&conn->ibc_lock); - - /* Nuke any dangling conns from a different peer instance... */ - kibnal_close_stale_conns_locked (conn->ibc_peer, - conn->ibc_incarnation); - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - /* queue up all the receives */ - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc */ - kibnal_conn_addref(conn); - - CDEBUG(D_NET, "RX[%d] %p->%p - "LPX64"\n", - i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg, - conn->ibc_rxs[i].rx_vaddr); - - kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - } - - kibnal_check_sends (conn); - return; - } - - if (status == 0) { - /* connection established, but peer was deleted. Schedule for - * reaper to cm_disconnect... */ - status = -ECONNABORTED; - kibnal_close_conn_locked (conn, status); - } else { - /* just waiting for refs to drain */ - conn->ibc_state = IBNAL_CONN_ZOMBIE; - } - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - kibnal_peer_connect_failed (conn->ibc_peer, active, status); -} - -int -kibnal_accept_connreq (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid, - kib_msg_t *msg, int nob) -{ - kib_conn_t *conn; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - rc = kibnal_unpack_msg(msg, 0, nob); - if (rc != 0) { - CERROR("Can't unpack connreq msg: %d\n", rc); - return -EPROTO; - } - - CDEBUG(D_NET, "connreq from %s\n", libcfs_nid2str(msg->ibm_srcnid)); - - if (msg->ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - msg->ibm_type, libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: bad queue depth %d (%d expected)\n", - libcfs_nid2str(msg->ibm_srcnid), - msg->ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - return (-EPROTO); - } - - conn = kibnal_create_conn(); - if (conn == NULL) - return (-ENOMEM); - - /* assume 'nid' is a new peer */ - rc = kibnal_create_peer(&peer, msg->ibm_srcnid); - if (rc != 0) { - kibnal_conn_decref(conn); - return (-ENOMEM); - } - - write_lock_irqsave (&kibnal_data.kib_global_lock, flags); - - /* Check I'm the same instance that gave the connection parameters. - * NB If my incarnation changes after this, the peer will get nuked and - * we'll spot that when the connection is finally added into the peer's - * connlist */ - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - CERROR("Stale connection params from %s\n", - libcfs_nid2str(msg->ibm_srcnid)); - kibnal_conn_decref(conn); - kibnal_peer_decref(peer); - return -ESTALE; - } - - peer2 = kibnal_find_peer_locked(msg->ibm_srcnid); - if (peer2 == NULL) { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - - /* peer table takes my ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist(msg->ibm_srcnid)); - } else { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - msg->ibm_srcnid < kibnal_data.kib_ni->ni_nid) { - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kibnal_conn_decref(conn); - kibnal_peer_decref(peer); - return -EALREADY; - } - - kibnal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for conn */ - kibnal_peer_addref(peer); - peer->ibp_accepting++; - - write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags); - - conn->ibc_peer = peer; - conn->ibc_state = IBNAL_CONN_CONNECTING; - conn->ibc_comm_id = cid; - conn->ibc_incarnation = msg->ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_version = msg->ibm_version; - - *connp = conn; - return (0); -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_bad_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - CERROR ("Unexpected event %d: conn %p\n", event, arg); - LBUG (); - return TS_IB_CM_CALLBACK_PROCEED; -} - -void -kibnal_abort_txs (kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - unsigned long flags; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - } else { - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_sending == 0); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_passive_rdma_wait = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - - kibnal_txlist_done (&zombies, -ECONNABORTED); -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - int rc; - - /* Established Connection Notifier */ - - switch (event) { - default: - CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_close_conn (conn, -ECONNABORTED); - break; - - case TS_IB_CM_DISCONNECTED: - CDEBUG(D_NETERROR, "Connection %p -> %s DISCONNECTED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, 0); - break; - - case TS_IB_CM_IDLE: - CDEBUG(D_NET, "Connection %p -> %s IDLE.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - /* LASSERT (no further callbacks) */ - rc = tsIbCmCallbackModify(cid, kibnal_bad_conn_callback, conn); - LASSERT (rc == 0); - - /* NB we wait until the connection has closed before - * completing outstanding passive RDMAs so we can be sure - * the network can't touch the mapped memory any more. */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_conn_decref(conn); /* Lose CM's ref */ - break; - } - - return TS_IB_CM_CALLBACK_PROCEED; -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - int rc; - - switch (event) { - default: - if (conn == NULL) { - /* no connection yet */ - CERROR ("Unexpected event: %d\n", event); - return TS_IB_CM_CALLBACK_ABORT; - } - - CERROR ("%s event %p -> %s: %d\n", - (event == TS_IB_CM_IDLE) ? "IDLE" : "Unexpected", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_connreq_done(conn, 0, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - - case TS_IB_CM_REQ_RECEIVED: { - struct ib_cm_req_received_param *req = param; - kib_msg_t *msg = req->remote_private_data; - - LASSERT (conn == NULL); - - /* Don't really know srcnid until successful unpack */ - CDEBUG(D_NET, "REQ from ?%s?\n", libcfs_nid2str(msg->ibm_srcnid)); - - rc = kibnal_accept_connreq(&conn, cid, msg, - req->remote_private_data_len); - if (rc != 0) { - CERROR ("Can't accept ?%s?: %d\n", - libcfs_nid2str(msg->ibm_srcnid), rc); - return TS_IB_CM_CALLBACK_ABORT; - } - - /* update 'arg' for next callback */ - rc = tsIbCmCallbackModify(cid, kibnal_passive_conn_callback, conn); - LASSERT (rc == 0); - - msg = req->accept_param.reply_private_data; - kibnal_init_msg(msg, IBNAL_MSG_CONNACK, - sizeof(msg->ibm_u.connparams)); - - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - - kibnal_pack_msg(msg, conn->ibc_version, 0, - conn->ibc_peer->ibp_nid, - conn->ibc_incarnation); - - req->accept_param.qp = conn->ibc_qp; - req->accept_param.reply_private_data_len = msg->ibm_nob; - req->accept_param.responder_resources = IBNAL_RESPONDER_RESOURCES; - req->accept_param.initiator_depth = IBNAL_RESPONDER_RESOURCES; - req->accept_param.rnr_retry_count = IBNAL_RNR_RETRY; - req->accept_param.flow_control = IBNAL_FLOW_CONTROL; - - CDEBUG(D_NET, "Proceeding\n"); - return TS_IB_CM_CALLBACK_PROCEED; /* CM takes my ref on conn */ - } - - case TS_IB_CM_ESTABLISHED: - LASSERT (conn != NULL); - CWARN("Connection %p -> %s ESTABLISHED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_connreq_done(conn, 0, 0); - return TS_IB_CM_CALLBACK_PROCEED; - } -} - -tTS_IB_CM_CALLBACK_RETURN -kibnal_active_conn_callback (tTS_IB_CM_EVENT event, - tTS_IB_CM_COMM_ID cid, - void *param, - void *arg) -{ - kib_conn_t *conn = arg; - unsigned long flags; - - switch (event) { - case TS_IB_CM_REP_RECEIVED: { - struct ib_cm_rep_received_param *rep = param; - kib_msg_t *msg = rep->remote_private_data; - int nob = rep->remote_private_data_len; - int rc; - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking conn ack from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (msg->ibm_type != IBNAL_MSG_CONNACK) { - CERROR ("Unexpected conn ack type %d from %s\n", - msg->ibm_type, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR("Stale conn ack from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -ESTALE); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR ("Bad queue depth %d from %s\n", - msg->ibm_u.connparams.ibcp_queue_depth, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } - - CDEBUG(D_NET, "Connection %p -> %s REP_RECEIVED.\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - return TS_IB_CM_CALLBACK_PROCEED; - } - - case TS_IB_CM_ESTABLISHED: - CWARN("Connection %p -> %s ESTABLISHED\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_connreq_done(conn, 1, 0); - return TS_IB_CM_CALLBACK_PROCEED; - - case TS_IB_CM_IDLE: - CDEBUG(D_NETERROR, "Connection %p -> %s IDLE\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* I assume this connection attempt was rejected because the - * peer found a stale QP; I'll just try again */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - kibnal_schedule_active_connect_locked(conn->ibc_peer); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_connreq_done(conn, 1, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - - default: - CDEBUG(D_NETERROR, "Connection %p -> %s ERROR %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), event); - kibnal_connreq_done(conn, 1, -ECONNABORTED); - kibnal_conn_decref(conn); /* drop CM's ref */ - return TS_IB_CM_CALLBACK_ABORT; - } -} - -int -kibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status, - struct ib_path_record *resp, int remaining, - void *arg) -{ - kib_conn_t *conn = arg; - kib_peer_t *peer = conn->ibc_peer; - kib_msg_t *msg = &conn->ibc_connreq->cr_msg; - - if (status != 0) { - CDEBUG (D_NETERROR, "Pathreq %p -> %s failed: %d\n", - conn, libcfs_nid2str(peer->ibp_nid), status); - kibnal_connreq_done(conn, 1, status); - kibnal_conn_decref(conn); /* drop callback's ref */ - return 1; /* non-zero prevents further callbacks */ - } - - conn->ibc_connreq->cr_path = *resp; - - kibnal_init_msg(msg, IBNAL_MSG_CONNREQ, sizeof(msg->ibm_u.connparams)); - msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - kibnal_pack_msg(msg, conn->ibc_version, 0, - peer->ibp_nid, conn->ibc_incarnation); - - conn->ibc_connreq->cr_connparam = (struct ib_cm_active_param) { - .qp = conn->ibc_qp, - .req_private_data = msg, - .req_private_data_len = msg->ibm_nob, - .responder_resources = IBNAL_RESPONDER_RESOURCES, - .initiator_depth = IBNAL_RESPONDER_RESOURCES, - .retry_count = IBNAL_RETRY, - .rnr_retry_count = IBNAL_RNR_RETRY, - .cm_response_timeout = *kibnal_tunables.kib_timeout, - .max_cm_retries = IBNAL_CM_RETRY, - .flow_control = IBNAL_FLOW_CONTROL, - }; - - /* XXX set timeout just like SDP!!!*/ - conn->ibc_connreq->cr_path.packet_life = 13; - - /* Flag I'm getting involved with the CM... */ - conn->ibc_state = IBNAL_CONN_CONNECTING; - - CDEBUG(D_NET, "Connecting to, service id "LPX64", on %s\n", - conn->ibc_connreq->cr_svcrsp.ibsr_svc_id, - libcfs_nid2str(peer->ibp_nid)); - - /* kibnal_connect_callback gets my conn ref */ - status = ib_cm_connect (&conn->ibc_connreq->cr_connparam, - &conn->ibc_connreq->cr_path, NULL, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_id, 0, - kibnal_active_conn_callback, conn, - &conn->ibc_comm_id); - if (status != 0) { - CERROR ("Connect %p -> %s failed: %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), status); - /* Back out state change: I've not got a CM comm_id yet... */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - kibnal_connreq_done(conn, 1, status); - kibnal_conn_decref(conn); /* Drop callback's ref */ - } - - return 1; /* non-zero to prevent further callbacks */ -} - -void -kibnal_connect_peer (kib_peer_t *peer) -{ - kib_conn_t *conn; - int rc; - - conn = kibnal_create_conn(); - if (conn == NULL) { - CERROR ("Can't allocate conn\n"); - kibnal_peer_connect_failed (peer, 1, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - LIBCFS_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq)); - if (conn->ibc_connreq == NULL) { - CERROR ("Can't allocate connreq\n"); - kibnal_connreq_done(conn, 1, -ENOMEM); - kibnal_conn_decref(conn); /* drop my ref */ - return; - } - - memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq)); - - rc = kibnal_make_svcqry(conn); - if (rc != 0) { - kibnal_connreq_done (conn, 1, rc); - kibnal_conn_decref(conn); /* drop my ref */ - return; - } - - rc = ib_cached_gid_get(kibnal_data.kib_device, - kibnal_data.kib_port, 0, - conn->ibc_connreq->cr_gid); - LASSERT (rc == 0); - - /* kibnal_pathreq_callback gets my conn ref */ - rc = tsIbPathRecordRequest (kibnal_data.kib_device, - kibnal_data.kib_port, - conn->ibc_connreq->cr_gid, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_gid, - conn->ibc_connreq->cr_svcrsp.ibsr_svc_pkey, - 0, - *kibnal_tunables.kib_timeout * HZ, - 0, - kibnal_pathreq_callback, conn, - &conn->ibc_connreq->cr_tid); - if (rc == 0) - return; /* callback now has my ref on conn */ - - CERROR ("Path record request %p -> %s failed: %d\n", - conn, libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_connreq_done(conn, 1, rc); - kibnal_conn_decref(conn); /* drop my ref */ -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - unsigned long flags; - int timed_out = 0; - - spin_lock_irqsave (&conn->ibc_lock, flags); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (tx->tx_passive_rdma || - !tx->tx_passive_rdma_wait); - - LASSERT (tx->tx_passive_rdma_wait || - tx->tx_sending != 0); - } else { - LASSERT (!tx->tx_passive_rdma_wait); - LASSERT (tx->tx_sending == 0); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock_irqrestore (&conn->ibc_lock, flags); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_conns (int idx) -{ - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - kibnal_conn_addref(conn); - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_terminate_conn (kib_conn_t *conn) -{ - int rc; - - CDEBUG(D_NET, "conn %p\n", conn); - LASSERT (conn->ibc_state == IBNAL_CONN_DEATHROW); - conn->ibc_state = IBNAL_CONN_ZOMBIE; - - rc = ib_cm_disconnect (conn->ibc_comm_id); - if (rc != 0) - CERROR ("Error %d disconnecting conn %p -> %s\n", - rc, conn, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_peer_notify(conn->ibc_peer); -} - -int -kibnal_reaper (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_conn_t *conn; - int timeout; - int i; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_reaper"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (!list_empty (&kibnal_data.kib_reaper_conns)) { - conn = list_entry (kibnal_data.kib_reaper_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - switch (conn->ibc_state) { - case IBNAL_CONN_DEATHROW: - LASSERT (conn->ibc_comm_id != TS_IB_CM_COMM_ID_INVALID); - /* Disconnect: conn becomes a zombie in the - * callback and last ref reschedules it - * here... */ - kibnal_terminate_conn(conn); - kibnal_conn_decref(conn); - break; - - case IBNAL_CONN_INIT_QP: - case IBNAL_CONN_ZOMBIE: - kibnal_destroy_conn (conn); - break; - - default: - CERROR ("Bad conn %p state: %d\n", - conn, conn->ibc_state); - LBUG(); - } - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - continue; - } - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - /* careful with the jiffy wrap... */ - while ((timeout = (int)(deadline - jiffies)) <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_conns (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - } - - kibnal_data.kib_reaper_waketime = jiffies + timeout; - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_reaper_waitq, &wait); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_reaper_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_reaper_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_reaper_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -int -kibnal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - wait_queue_t wait; - unsigned long flags; - kib_peer_t *peer; - kib_acceptsock_t *as; - int did_something; - - snprintf(name, sizeof(name), "kibnal_connd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry (&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - if (!list_empty (&kibnal_data.kib_connd_acceptq)) { - as = list_entry (kibnal_data.kib_connd_acceptq.next, - kib_acceptsock_t, ibas_list); - list_del (&as->ibas_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_handle_svcqry(as->ibas_sock); - kibnal_free_acceptsock(as); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - did_something = 1; - } - - /* Only handle an outgoing connection request if there is someone left - * to handle an incoming svcqry */ - if (!list_empty (&kibnal_data.kib_connd_peers) && - ((kibnal_data.kib_connd_connecting + 1) < - *kibnal_tunables.kib_n_connd)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - kibnal_data.kib_connd_connecting++; - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_connect_peer (peer); - kibnal_peer_decref(peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - did_something = 1; - kibnal_data.kib_connd_connecting--; - } - - if (did_something) - continue; - - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_connd_waitq, &wait); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - schedule(); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - char name[16]; - kib_rx_t *rx; - kib_tx_t *tx; - unsigned long flags; - int rc; - int counter = 0; - int did_something; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - did_something = 0; - - while (!list_empty(&kibnal_data.kib_sched_txq)) { - tx = list_entry(kibnal_data.kib_sched_txq.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - kibnal_tx_done(tx); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - - if (!list_empty(&kibnal_data.kib_sched_rxq)) { - rx = list_entry(kibnal_data.kib_sched_rxq.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - kibnal_rx(rx); - - did_something = 1; - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - counter = 0; - - if (!did_something) { - rc = wait_event_interruptible_exclusive( - kibnal_data.kib_sched_waitq, - !list_empty(&kibnal_data.kib_sched_txq) || - !list_empty(&kibnal_data.kib_sched_rxq) || - kibnal_data.kib_shutdown); - } else { - our_cond_resched(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, - flags); - } - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/openiblnd/openiblnd_modparams.c b/lnet/klnds/openiblnd/openiblnd_modparams.c deleted file mode 100644 index f40004b322a2de1bff5bc67906730c4656ee882f..0000000000000000000000000000000000000000 --- a/lnet/klnds/openiblnd/openiblnd_modparams.c +++ /dev/null @@ -1,149 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "openiblnd.h" - -static char *ipif_basename = "ib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static int n_connd = 4; -CFS_MODULE_PARM(n_connd, "i", int, 0444, - "# of connection daemons"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 384; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 256; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 16; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -kib_tunables_t kibnal_tunables = { - .kib_ipif_basename = &ipif_basename, - .kib_n_connd = &n_connd, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_keepalive = &keepalive, -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -static ctl_table kibnal_ctl_table[] = { - {1, "ipif_basename", &ipif_basename, - 1024, 0444, NULL, &proc_dostring}, - {2, "n_connd", &n_connd, - sizeof(int), 0444, NULL, &proc_dointvec}, - {3, "min_reconnect_interval", &min_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {4, "max_reconnect_interval", &max_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {5, "concurrent_peers", &concurrent_peers, - sizeof(int), 0444, NULL, &proc_dointvec}, - {6, "cksum", &cksum, - sizeof(int), 0644, NULL, &proc_dointvec}, - {7, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {8, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {9, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {10, "peer_credits", &peer_credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {11, "keepalive", &keepalive, - sizeof(int), 0644, NULL, &proc_dointvec}, - {0} -}; - -static ctl_table kibnal_top_ctl_table[] = { - {203, "openibnal", NULL, 0, 0555, kibnal_ctl_table}, - {0} -}; - -int -kibnal_tunables_init () -{ - kibnal_tunables.kib_sysctl = - register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/ptllnd/.cvsignore b/lnet/klnds/ptllnd/.cvsignore deleted file mode 100644 index 0586565dc9fc3efe66aef7b2989e5f29b8823c37..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.*.cmd -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/ptllnd/Makefile.in b/lnet/klnds/ptllnd/Makefile.in deleted file mode 100755 index ec2f9bb1544ba61171435411bdd9df47c459134d..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/Makefile.in +++ /dev/null @@ -1,13 +0,0 @@ -MODULES := kptllnd - -EXTRA_POST_CFLAGS := @PTLLNDCPPFLAGS@ - -kptllnd-objs := ptllnd.o \ - ptllnd_cb.o \ - ptllnd_modparams.o \ - ptllnd_peer.o \ - ptllnd_rx_buf.o \ - ptllnd_tx.o \ - ptllnd_ptltrace.o - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ptllnd/README b/lnet/klnds/ptllnd/README deleted file mode 100644 index 5cb6cfcafe7455c9f722b87b2ad7f637362e2054..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/README +++ /dev/null @@ -1,47 +0,0 @@ -1. This version of the Portals LND is intended to work on the Cray XT3 using - Cray Portals as a network transport. - -2. To enable the building of the Portals LND (ptllnd.ko) configure with the - following option: - ./configure --with-portals=<path-to-portals-headers> - -3. The following configuration options are supported - - ntx: - The total number of message descritprs - - concurrent_peers: - The maximum number of conncurent peers. Peers attemting - to connect beyond the maximum will not be allowd. - - peer_hash_table_size: - The number of hash table slots for the peers. This number - should scale with concurrent_peers. - - cksum: - Set to non-zero to enable message (not RDMA) checksums for - outgoing packets. Incoming packets will always be checksumed - if necssary, independnt of this value. - - timeout: - The amount of time a request can linger in a peers active - queue, before the peer is considered dead. Units: seconds. - - portal: - The portal ID to use for the ptllnd traffic. - - rxb_npages: - The number of pages in a RX Buffer. - - credits: - The maximum total number of concurrent sends that are - outstanding at any given instant. - - peercredits: - The maximum number of concurrent sends that are - outstanding to a single piere at any given instant. - - max_msg_size: - The maximum immedate message size. This MUST be - the same on all nodes in a cluster. A peer connecting - with a diffrent max_msg_size will be rejected. diff --git a/lnet/klnds/ptllnd/autoMakefile.am b/lnet/klnds/ptllnd/autoMakefile.am deleted file mode 100755 index bd8cc9c81740cb8310c6846267271a7c619b909d..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/autoMakefile.am +++ /dev/null @@ -1,8 +0,0 @@ -if MODULES -if BUILD_PTLLND -modulenet_DATA = kptllnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kptllnd-objs:%.o=%.c) ptllnd.h diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c deleted file mode 100755 index f9361f900d0dc8a74ae2d198d31c3fb88b726489..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd.c +++ /dev/null @@ -1,883 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -lnd_t kptllnd_lnd = { - .lnd_type = PTLLND, - .lnd_startup = kptllnd_startup, - .lnd_shutdown = kptllnd_shutdown, - .lnd_ctl = kptllnd_ctl, - .lnd_send = kptllnd_send, - .lnd_recv = kptllnd_recv, - .lnd_eager_recv = kptllnd_eager_recv, -}; - -kptl_data_t kptllnd_data; - -char * -kptllnd_ptlid2str(ptl_process_id_t id) -{ - static char strs[64][32]; - static int idx = 0; - - unsigned long flags; - char *str; - - spin_lock_irqsave(&kptllnd_data.kptl_ptlid2str_lock, flags); - str = strs[idx++]; - if (idx >= sizeof(strs)/sizeof(strs[0])) - idx = 0; - spin_unlock_irqrestore(&kptllnd_data.kptl_ptlid2str_lock, flags); - - snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid); - return str; -} - -void -kptllnd_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU - * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */ - - - /* Constants... */ - CLASSERT (PTL_RESERVED_MATCHBITS == 0x100); - CLASSERT (LNET_MSG_MATCHBITS == 0); - CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E); - CLASSERT (PTLLND_MSG_VERSION == 0x04); - CLASSERT (PTLLND_RDMA_OK == 0x00); - CLASSERT (PTLLND_RDMA_FAIL == 0x01); - CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00); - CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01); - CLASSERT (PTLLND_MSG_TYPE_GET == 0x02); - CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03); - CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04); - CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05); - CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06); - - /* Checks for struct kptl_msg_t */ - CLASSERT ((int)sizeof(kptl_msg_t) == 136); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12); - - /* Checks for struct kptl_immediate_msg_t */ - CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1); - - /* Checks for struct kptl_rdma_msg_t */ - CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8); - - /* Checks for struct kptl_hello_msg_t */ - CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4); -} - -const char *kptllnd_evtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_EVENT_GET_START); - DO_TYPE(PTL_EVENT_GET_END); - DO_TYPE(PTL_EVENT_PUT_START); - DO_TYPE(PTL_EVENT_PUT_END); - DO_TYPE(PTL_EVENT_REPLY_START); - DO_TYPE(PTL_EVENT_REPLY_END); - DO_TYPE(PTL_EVENT_ACK); - DO_TYPE(PTL_EVENT_SEND_START); - DO_TYPE(PTL_EVENT_SEND_END); - DO_TYPE(PTL_EVENT_UNLINK); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -const char *kptllnd_msgtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTLLND_MSG_TYPE_INVALID); - DO_TYPE(PTLLND_MSG_TYPE_PUT); - DO_TYPE(PTLLND_MSG_TYPE_GET); - DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); - DO_TYPE(PTLLND_MSG_TYPE_HELLO); - DO_TYPE(PTLLND_MSG_TYPE_NOOP); - DO_TYPE(PTLLND_MSG_TYPE_NAK); - default: - return "<unknown msg type>"; - } -#undef DO_TYPE -} - -const char *kptllnd_errtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_OK); - DO_TYPE(PTL_SEGV); - DO_TYPE(PTL_NO_SPACE); - DO_TYPE(PTL_ME_IN_USE); - DO_TYPE(PTL_NAL_FAILED); - DO_TYPE(PTL_NO_INIT); - DO_TYPE(PTL_IFACE_DUP); - DO_TYPE(PTL_IFACE_INVALID); - DO_TYPE(PTL_HANDLE_INVALID); - DO_TYPE(PTL_MD_INVALID); - DO_TYPE(PTL_ME_INVALID); - DO_TYPE(PTL_PROCESS_INVALID); - DO_TYPE(PTL_PT_INDEX_INVALID); - DO_TYPE(PTL_SR_INDEX_INVALID); - DO_TYPE(PTL_EQ_INVALID); - DO_TYPE(PTL_EQ_DROPPED); - DO_TYPE(PTL_EQ_EMPTY); - DO_TYPE(PTL_MD_NO_UPDATE); - DO_TYPE(PTL_FAIL); - DO_TYPE(PTL_AC_INDEX_INVALID); - DO_TYPE(PTL_MD_ILLEGAL); - DO_TYPE(PTL_ME_LIST_TOO_LONG); - DO_TYPE(PTL_MD_IN_USE); - DO_TYPE(PTL_NI_INVALID); - DO_TYPE(PTL_PID_INVALID); - DO_TYPE(PTL_PT_FULL); - DO_TYPE(PTL_VAL_FAILED); - DO_TYPE(PTL_NOT_IMPLEMENTED); - DO_TYPE(PTL_NO_ACK); - DO_TYPE(PTL_EQ_IN_USE); - DO_TYPE(PTL_PID_IN_USE); - DO_TYPE(PTL_INV_EQ_SIZE); - DO_TYPE(PTL_AGAIN); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -__u32 -kptllnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob) -{ - msg->ptlm_type = type; - msg->ptlm_nob = (offsetof(kptl_msg_t, ptlm_u) + body_nob + 7) & ~7; - - LASSERT(msg->ptlm_nob <= *kptllnd_tunables.kptl_max_msg_size); -} - -void -kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer) -{ - msg->ptlm_magic = PTLLND_MSG_MAGIC; - msg->ptlm_version = PTLLND_MSG_VERSION; - /* msg->ptlm_type Filled in kptllnd_init_msg() */ - msg->ptlm_credits = peer->peer_outstanding_credits; - /* msg->ptlm_nob Filled in kptllnd_init_msg() */ - msg->ptlm_cksum = 0; - msg->ptlm_srcnid = kptllnd_data.kptl_ni->ni_nid; - msg->ptlm_srcstamp = peer->peer_myincarnation; - msg->ptlm_dstnid = peer->peer_id.nid; - msg->ptlm_dststamp = peer->peer_incarnation; - msg->ptlm_srcpid = the_lnet.ln_pid; - msg->ptlm_dstpid = peer->peer_id.pid; - - if (*kptllnd_tunables.kptl_checksum) { - /* NB ptlm_cksum zero while computing cksum */ - msg->ptlm_cksum = kptllnd_cksum(msg, - offsetof(kptl_msg_t, ptlm_u)); - } -} - -int -kptllnd_msg_unpack(kptl_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kptl_msg_t, ptlm_u); - __u32 msg_cksum; - __u16 msg_version; - int flip; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Very Short message: %d\n", nob); - return -EPROTO; - } - - /* - * Determine if we need to flip - */ - if (msg->ptlm_magic == PTLLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ptlm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; - - if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad version: got %04x expected %04x\n", - (__u32)msg_version, PTLLND_MSG_VERSION); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: got %d, wanted at least %d\n", - nob, hdr_size); - return -EPROTO; - } - - /* checksum must be computed with - * 1) ptlm_cksum zero and - * 2) BEFORE anything gets modified/flipped - */ - msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum; - msg->ptlm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kptllnd_cksum(msg, hdr_size)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - - msg->ptlm_version = msg_version; - msg->ptlm_cksum = msg_cksum; - - if (flip) { - /* These two are 1 byte long so we don't swap them - But check this assumtion*/ - CLASSERT (sizeof(msg->ptlm_type) == 1); - CLASSERT (sizeof(msg->ptlm_credits) == 1); - /* src & dst stamps are opaque cookies */ - __swab32s(&msg->ptlm_nob); - __swab64s(&msg->ptlm_srcnid); - __swab64s(&msg->ptlm_dstnid); - __swab32s(&msg->ptlm_srcpid); - __swab32s(&msg->ptlm_dstpid); - } - - if (msg->ptlm_nob != nob) { - CERROR("msg_nob corrupt: got 0x%08x, wanted %08x\n", - msg->ptlm_nob, nob); - return -EPROTO; - } - - switch(msg->ptlm_type) - { - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - if (nob < hdr_size + sizeof(kptl_rdma_msg_t)) { - CERROR("Short rdma request: got %d, want %d\n", - nob, hdr_size + (int)sizeof(kptl_rdma_msg_t)); - return -EPROTO; - } - - if (flip) - __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits); - - if (msg->ptlm_u.rdma.kptlrm_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Bad matchbits "LPX64"\n", - msg->ptlm_u.rdma.kptlrm_matchbits); - return -EPROTO; - } - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - if (nob < offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)) { - CERROR("Short immediate: got %d, want %d\n", nob, - (int)offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)); - return -EPROTO; - } - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_NOOP: - case PTLLND_MSG_TYPE_NAK: - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_HELLO: - if (nob < hdr_size + sizeof(kptl_hello_msg_t)) { - CERROR("Short hello: got %d want %d\n", - nob, hdr_size + (int)sizeof(kptl_hello_msg_t)); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits); - __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); - } - break; - - default: - CERROR("Bad message type: 0x%02x\n", (__u32)msg->ptlm_type); - return -EPROTO; - } - - return 0; -} - -int -kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n", cmd, arg); - - /* - * Validate that the context block is actually - * pointing to this interface - */ - LASSERT (ni == kptllnd_data.kptl_ni); - - switch(cmd) { - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id; - - id.nid = data->ioc_nid; - id.pid = data->ioc_u32[1]; - - rc = kptllnd_peer_del(id); - break; - } - - case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - __u64 incarnation = 0; - __u64 next_matchbits = 0; - __u64 last_matchbits_seen = 0; - int state = 0; - int sent_hello = 0; - int refcount = 0; - int nsendq = 0; - int nactiveq = 0; - int credits = 0; - int outstanding_credits = 0; - - rc = kptllnd_get_peer_info(data->ioc_count, &id, - &state, &sent_hello, - &refcount, &incarnation, - &next_matchbits, &last_matchbits_seen, - &nsendq, &nactiveq, - &credits, &outstanding_credits); - /* wince... */ - data->ioc_nid = id.nid; - data->ioc_net = state; - data->ioc_flags = sent_hello; - data->ioc_count = refcount; - data->ioc_u64[0] = incarnation; - data->ioc_u32[0] = (__u32)next_matchbits; - data->ioc_u32[1] = (__u32)(next_matchbits >> 32); - data->ioc_u32[2] = (__u32)last_matchbits_seen; - data->ioc_u32[3] = (__u32)(last_matchbits_seen >> 32); - data->ioc_u32[4] = id.pid; - data->ioc_u32[5] = (nsendq << 16) | nactiveq; - data->ioc_u32[6] = (credits << 16) | outstanding_credits; - break; - } - - default: - rc=-EINVAL; - break; - } - CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n", rc); - return rc; -} - -int -kptllnd_startup (lnet_ni_t *ni) -{ - int rc; - int i; - int spares; - struct timeval tv; - ptl_err_t ptl_rc; - - LASSERT (ni->ni_lnd == &kptllnd_lnd); - - if (kptllnd_data.kptl_init != PTLLND_INIT_NOTHING) { - CERROR("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kptllnd_tunables.kptl_max_procs_per_node < 1) { - CERROR("max_procs_per_node must be > 1\n"); - return -EINVAL; - } - - *kptllnd_tunables.kptl_max_msg_size &= ~7; - if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE) - *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); - - /* - * zero pointers, flags etc - * put everything into a known state. - */ - memset (&kptllnd_data, 0, sizeof (kptllnd_data)); - kptllnd_data.kptl_eqh = PTL_INVALID_HANDLE; - kptllnd_data.kptl_nih = PTL_INVALID_HANDLE; - - /* - * Uptick the module reference count - */ - PORTAL_MODULE_USE; - - /* - * Setup pointers between the ni and context data block - */ - kptllnd_data.kptl_ni = ni; - ni->ni_data = &kptllnd_data; - - /* - * Setup Credits - */ - ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits; - ni->ni_peertxcredits = *kptllnd_tunables.kptl_peercredits; - - kptllnd_data.kptl_expected_peers = - *kptllnd_tunables.kptl_max_nodes * - *kptllnd_tunables.kptl_max_procs_per_node; - - /* - * Initialize the Network interface instance - * We use the default because we don't have any - * way to choose a better interface. - * Requested and actual limits are ignored. - */ - ptl_rc = PtlNIInit( -#ifdef _USING_LUSTRE_PORTALS_ - PTL_IFACE_DEFAULT, -#else - CRAY_KERN_NAL, -#endif - *kptllnd_tunables.kptl_pid, NULL, NULL, - &kptllnd_data.kptl_nih); - - /* - * Note: PTL_IFACE_DUP simply means that the requested - * interface was already inited and that we're sharing it. - * Which is ok. - */ - if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) { - CERROR ("PtlNIInit: error %d\n", ptl_rc); - rc = -EINVAL; - goto failed; - } - - /* NB eq size irrelevant if using a callback */ - ptl_rc = PtlEQAlloc(kptllnd_data.kptl_nih, - 8, /* size */ - kptllnd_eq_callback, /* handler callback */ - &kptllnd_data.kptl_eqh); /* output handle */ - if (ptl_rc != PTL_OK) { - CERROR("PtlEQAlloc failed %d\n", ptl_rc); - rc = -ENOMEM; - goto failed; - } - - /* - * Fetch the lower NID - */ - ptl_rc = PtlGetId(kptllnd_data.kptl_nih, - &kptllnd_data.kptl_portals_id); - if (ptl_rc != PTL_OK) { - CERROR ("PtlGetID: error %d\n", ptl_rc); - rc = -EINVAL; - goto failed; - } - - if (kptllnd_data.kptl_portals_id.pid != *kptllnd_tunables.kptl_pid) { - /* The kernel ptllnd must have the expected PID */ - CERROR("Unexpected PID: %u (%u expected)\n", - kptllnd_data.kptl_portals_id.pid, - *kptllnd_tunables.kptl_pid); - rc = -EINVAL; - goto failed; - } - - ni->ni_nid = kptllnd_ptl2lnetnid(kptllnd_data.kptl_portals_id.nid); - - CDEBUG(D_NET, "ptl id=%s, lnet id=%s\n", - kptllnd_ptlid2str(kptllnd_data.kptl_portals_id), - libcfs_nid2str(ni->ni_nid)); - - /* Initialized the incarnation - it must be for-all-time unique, even - * accounting for the fact that we increment it when we disconnect a - * peer that's using it */ - do_gettimeofday(&tv); - kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) + - tv.tv_usec; - CDEBUG(D_NET, "Incarnation="LPX64"\n", kptllnd_data.kptl_incarnation); - - /* - * Setup the sched locks/lists/waitq - */ - spin_lock_init(&kptllnd_data.kptl_sched_lock); - init_waitqueue_head(&kptllnd_data.kptl_sched_waitq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_txq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxq); - INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxbq); - - /* - * Setup the tx locks/lists - */ - spin_lock_init(&kptllnd_data.kptl_tx_lock); - INIT_LIST_HEAD(&kptllnd_data.kptl_idle_txs); - atomic_set(&kptllnd_data.kptl_ntx, 0); - - /* - * Allocate and setup the peer hash table - */ - rwlock_init(&kptllnd_data.kptl_peer_rw_lock); - init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq); - INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers); - INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers); - - spin_lock_init(&kptllnd_data.kptl_ptlid2str_lock); - - kptllnd_data.kptl_peer_hash_size = - *kptllnd_tunables.kptl_peer_hash_table_size; - LIBCFS_ALLOC(kptllnd_data.kptl_peers, - (kptllnd_data.kptl_peer_hash_size * - sizeof(struct list_head))); - if (kptllnd_data.kptl_peers == NULL) { - CERROR("Failed to allocate space for peer hash table size=%d\n", - kptllnd_data.kptl_peer_hash_size); - rc = -ENOMEM; - goto failed; - } - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - INIT_LIST_HEAD(&kptllnd_data.kptl_peers[i]); - - LIBCFS_ALLOC(kptllnd_data.kptl_nak_msg, offsetof(kptl_msg_t, ptlm_u)); - if (kptllnd_data.kptl_nak_msg == NULL) { - CERROR("Can't allocate NAK msg\n"); - rc = -ENOMEM; - goto failed; - } - memset(kptllnd_data.kptl_nak_msg, 0, offsetof(kptl_msg_t, ptlm_u)); - kptllnd_init_msg(kptllnd_data.kptl_nak_msg, PTLLND_MSG_TYPE_NAK, 0); - kptllnd_data.kptl_nak_msg->ptlm_magic = PTLLND_MSG_MAGIC; - kptllnd_data.kptl_nak_msg->ptlm_version = PTLLND_MSG_VERSION; - kptllnd_data.kptl_nak_msg->ptlm_srcpid = the_lnet.ln_pid; - kptllnd_data.kptl_nak_msg->ptlm_srcnid = ni->ni_nid; - kptllnd_data.kptl_nak_msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation; - kptllnd_data.kptl_nak_msg->ptlm_dstpid = LNET_PID_ANY; - kptllnd_data.kptl_nak_msg->ptlm_dstnid = LNET_NID_ANY; - - kptllnd_rx_buffer_pool_init(&kptllnd_data.kptl_rx_buffer_pool); - - kptllnd_data.kptl_rx_cache = - cfs_mem_cache_create("ptllnd_rx", - sizeof(kptl_rx_t) + - *kptllnd_tunables.kptl_max_msg_size, - 0, /* offset */ - 0); /* flags */ - if (kptllnd_data.kptl_rx_cache == NULL) { - CERROR("Can't create slab for RX descriptors\n"); - rc = -ENOMEM; - goto failed; - } - - /* lists/ptrs/locks initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_DATA; - - /*****************************************************/ - - rc = kptllnd_setup_tx_descs(); - if (rc != 0) { - CERROR("Can't pre-allocate %d TX descriptors: %d\n", - *kptllnd_tunables.kptl_ntx, rc); - goto failed; - } - - /* Start the scheduler threads for handling incoming requests. No need - * to advance the state because this will be automatically cleaned up - * now that PTLNAT_INIT_DATA state has been entered */ - CDEBUG(D_NET, "starting %d scheduler threads\n", PTLLND_N_SCHED); - for (i = 0; i < PTLLND_N_SCHED; i++) { - rc = kptllnd_thread_start(kptllnd_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn scheduler[%d]: %d\n", i, rc); - goto failed; - } - } - - rc = kptllnd_thread_start(kptllnd_watchdog, NULL); - if (rc != 0) { - CERROR("Can't spawn watchdog: %d\n", rc); - goto failed; - } - - /* Ensure that 'rxb_nspare' buffers can be off the net (being emptied) - * and we will still have enough buffers posted for all our peers */ - spares = *kptllnd_tunables.kptl_rxb_nspare * - ((*kptllnd_tunables.kptl_rxb_npages * PAGE_SIZE)/ - *kptllnd_tunables.kptl_max_msg_size); - - /* reserve and post the buffers */ - rc = kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - kptllnd_data.kptl_expected_peers + - spares); - if (rc != 0) { - CERROR("Can't reserve RX Buffer pool: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_ALL; - - /*****************************************************/ - - if (*kptllnd_tunables.kptl_checksum) - CWARN("Checksumming enabled\n"); - - CDEBUG(D_NET, "<<< kptllnd_startup SUCCESS\n"); - return 0; - - failed: - CDEBUG(D_NET, "kptllnd_startup failed rc=%d\n", rc); - kptllnd_shutdown(ni); - return rc; -} - -void -kptllnd_shutdown (lnet_ni_t *ni) -{ - int i; - ptl_err_t prc; - lnet_process_id_t process_id; - unsigned long flags; - - CDEBUG(D_MALLOC, "before LND cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - LASSERT (ni == kptllnd_data.kptl_ni); - - switch (kptllnd_data.kptl_init) { - default: - LBUG(); - - case PTLLND_INIT_ALL: - case PTLLND_INIT_DATA: - /* Stop receiving */ - kptllnd_rx_buffer_pool_fini(&kptllnd_data.kptl_rx_buffer_pool); - LASSERT (list_empty(&kptllnd_data.kptl_sched_rxq)); - LASSERT (list_empty(&kptllnd_data.kptl_sched_rxbq)); - - /* Hold peertable lock to interleave cleanly with peer birth/death */ - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - LASSERT (kptllnd_data.kptl_shutdown == 0); - kptllnd_data.kptl_shutdown = 1; /* phase 1 == destroy peers */ - - /* no new peers possible now */ - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - /* nuke all existing peers */ - process_id.nid = LNET_NID_ANY; - process_id.pid = LNET_PID_ANY; - kptllnd_peer_del(process_id); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - LASSERT (kptllnd_data.kptl_n_active_peers == 0); - - i = 2; - while (kptllnd_data.kptl_npeers != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d peers to terminate\n", - kptllnd_data.kptl_npeers); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - cfs_pause(cfs_time_seconds(1)); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, - flags); - } - - LASSERT(list_empty(&kptllnd_data.kptl_closing_peers)); - LASSERT(list_empty(&kptllnd_data.kptl_zombie_peers)); - LASSERT (kptllnd_data.kptl_peers != NULL); - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - LASSERT (list_empty (&kptllnd_data.kptl_peers[i])); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - CDEBUG(D_NET, "All peers deleted\n"); - - /* Shutdown phase 2: kill the daemons... */ - kptllnd_data.kptl_shutdown = 2; - mb(); - - i = 2; - while (atomic_read (&kptllnd_data.kptl_nthreads) != 0) { - /* Wake up all threads*/ - wake_up_all(&kptllnd_data.kptl_sched_waitq); - wake_up_all(&kptllnd_data.kptl_watchdog_waitq); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kptllnd_data.kptl_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - CDEBUG(D_NET, "All Threads stopped\n"); - LASSERT(list_empty(&kptllnd_data.kptl_sched_txq)); - - kptllnd_cleanup_tx_descs(); - - /* Nothing here now, but libcfs might soon require - * us to explicitly destroy wait queues and semaphores - * that would be done here */ - - /* fall through */ - - case PTLLND_INIT_NOTHING: - CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n"); - break; - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) { - prc = PtlEQFree(kptllnd_data.kptl_eqh); - if (prc != PTL_OK) - CERROR("Error %d freeing portals EQ\n", prc); - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) { - prc = PtlNIFini(kptllnd_data.kptl_nih); - if (prc != PTL_OK) - CERROR("Error %d finalizing portals NI\n", prc); - } - - LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0); - LASSERT (list_empty(&kptllnd_data.kptl_idle_txs)); - - if (kptllnd_data.kptl_rx_cache != NULL) - cfs_mem_cache_destroy(kptllnd_data.kptl_rx_cache); - - if (kptllnd_data.kptl_peers != NULL) - LIBCFS_FREE (kptllnd_data.kptl_peers, - sizeof (struct list_head) * - kptllnd_data.kptl_peer_hash_size); - - if (kptllnd_data.kptl_nak_msg != NULL) - LIBCFS_FREE (kptllnd_data.kptl_nak_msg, - offsetof(kptl_msg_t, ptlm_u)); - - memset(&kptllnd_data, 0, sizeof(kptllnd_data)); - - CDEBUG(D_MALLOC, "after LND cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - -int __init -kptllnd_module_init (void) -{ - int rc; - - kptllnd_assert_wire_constants(); - - rc = kptllnd_tunables_init(); - if (rc != 0) - return rc; - - kptllnd_init_ptltrace(); - - lnet_register_lnd(&kptllnd_lnd); - - return 0; -} - -void __exit -kptllnd_module_fini (void) -{ - lnet_unregister_lnd(&kptllnd_lnd); - kptllnd_tunables_fini(); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Portals LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kptllnd_module_init); -module_exit(kptllnd_module_fini); diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h deleted file mode 100755 index 3df2c3a29d8f613dbad9f5900c7d5a51874d35e0..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd.h +++ /dev/null @@ -1,549 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> - -#include <net/sock.h> -#include <linux/in.h> - - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> -#include <portals/p30.h> -#ifdef CRAY_XT3 -#include <portals/ptltrace.h> -#endif -#include <lnet/ptllnd.h> /* Depends on portals/p30.h */ - -/* - * Define this to enable console debug logging - * and simulation - */ -//#define PJK_DEBUGGING - -#ifdef CONFIG_SMP -# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define PTLLND_N_SCHED 1 /* # schedulers */ -#endif - -#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peercredits)-1) - /* when eagerly to return credits */ - -typedef struct -{ - int *kptl_ntx; /* # tx descs to pre-allocate */ - int *kptl_max_nodes; /* max # nodes all talking to me */ - int *kptl_max_procs_per_node; /* max # processes per node */ - int *kptl_checksum; /* checksum kptl_msg_t? */ - int *kptl_timeout; /* comms timeout (seconds) */ - int *kptl_portal; /* portal number */ - int *kptl_pid; /* portals PID (self + kernel peers) */ - int *kptl_rxb_npages; /* number of pages for rx buffer */ - int *kptl_rxb_nspare; /* number of spare rx buffers */ - int *kptl_credits; /* number of credits */ - int *kptl_peercredits; /* number of credits */ - int *kptl_max_msg_size; /* max immd message size*/ - int *kptl_peer_hash_table_size; /* # slots in peer hash table */ - int *kptl_reschedule_loops; /* scheduler yield loops */ - int *kptl_ack_puts; /* make portals ack PUTs */ -#ifdef CRAY_XT3 - int *kptl_ptltrace_on_timeout; /* dump pltrace on timeout? */ - char **kptl_ptltrace_basename; /* ptltrace dump file basename */ -#endif -#ifdef PJK_DEBUGGING - int *kptl_simulation_bitmap;/* simulation bitmap */ -#endif - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kptl_sysctl; /* sysctl interface */ -#endif -} kptl_tunables_t; - -#include "lnet/ptllnd_wire.h" - -/***********************************************************************/ - -typedef struct kptl_data kptl_data_t; -typedef struct kptl_rx_buffer kptl_rx_buffer_t; -typedef struct kptl_peer kptl_peer_t; - -typedef struct { - char eva_type; -} kptl_eventarg_t; - -#define PTLLND_EVENTARG_TYPE_MSG 0x1 -#define PTLLND_EVENTARG_TYPE_RDMA 0x2 -#define PTLLND_EVENTARG_TYPE_BUF 0x3 - -typedef struct kptl_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */ - kptl_msg_t *rx_msg; /* received message */ - int rx_nob; /* received message size */ - unsigned long rx_treceived; /* time received */ - ptl_process_id_t rx_initiator; /* sender's address */ -#ifdef CRAY_XT3 - ptl_uid_t rx_uid; /* sender's uid */ -#endif - kptl_peer_t *rx_peer; /* pointer to peer */ - char rx_space[0]; /* copy of incoming request */ -} kptl_rx_t; - -typedef struct kptl_rx_buffer_pool -{ - spinlock_t rxbp_lock; - struct list_head rxbp_list; /* all allocated buffers */ - int rxbp_count; /* # allocated buffers */ - int rxbp_reserved; /* # requests to buffer */ - int rxbp_shutdown; /* shutdown flag */ -} kptl_rx_buffer_pool_t; - -struct kptl_rx_buffer -{ - kptl_rx_buffer_pool_t *rxb_pool; - struct list_head rxb_list; /* for the rxb_pool list */ - struct list_head rxb_repost_list;/* for the kptl_sched_rxbq list */ - int rxb_posted:1; /* on the net */ - int rxb_idle:1; /* all done */ - kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */ - int rxb_refcount; /* reference count */ - ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */ - char *rxb_buffer; /* the buffer */ - -}; - -enum kptl_tx_type -{ - TX_TYPE_RESERVED = 0, - TX_TYPE_SMALL_MESSAGE = 1, - TX_TYPE_PUT_REQUEST = 2, - TX_TYPE_GET_REQUEST = 3, - TX_TYPE_PUT_RESPONSE = 4, - TX_TYPE_GET_RESPONSE = 5, -}; - -typedef union { -#ifdef _USING_LUSTRE_PORTALS_ - struct iovec iov[PTL_MD_MAX_IOV]; - lnet_kiov_t kiov[PTL_MD_MAX_IOV]; -#else - ptl_md_iovec_t iov[PTL_MD_MAX_IOV]; -#endif -} kptl_fragvec_t; - -typedef struct kptl_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs etc */ - atomic_t tx_refcount; /* reference count*/ - enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */ - int tx_active:1; /* queued on the peer */ - int tx_idle:1; /* on the free list */ - int tx_acked:1; /* portals ACK wanted (for debug only) */ - kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */ - kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */ - int tx_status; /* the status of this tx descriptor */ - ptl_handle_md_t tx_rdma_mdh; /* RDMA buffer */ - ptl_handle_md_t tx_msg_mdh; /* the portals MD handle for the initial message */ - lnet_msg_t *tx_lnet_msg; /* LNET message to finalize */ - lnet_msg_t *tx_lnet_replymsg; /* LNET reply message to finalize */ - kptl_msg_t *tx_msg; /* the message data */ - kptl_peer_t *tx_peer; /* the peer this is waiting on */ - unsigned long tx_deadline; /* deadline */ - unsigned long tx_tposted; /* time posted */ - ptl_md_t tx_rdma_md; /* rdma descriptor */ - kptl_fragvec_t *tx_frags; /* buffer fragments */ -} kptl_tx_t; - -enum kptllnd_peer_state -{ - PEER_STATE_UNINITIALIZED = 0, - PEER_STATE_ALLOCATED = 1, - PEER_STATE_WAITING_HELLO = 2, - PEER_STATE_ACTIVE = 3, - PEER_STATE_CLOSING = 4, - PEER_STATE_ZOMBIE = 5, -}; - -struct kptl_peer -{ - struct list_head peer_list; - atomic_t peer_refcount; /* The current refrences */ - enum kptllnd_peer_state peer_state; - spinlock_t peer_lock; /* serialize */ - struct list_head peer_sendq; /* txs waiting for mh handles */ - struct list_head peer_activeq; /* txs awaiting completion */ - lnet_process_id_t peer_id; /* Peer's LNET id */ - ptl_process_id_t peer_ptlid; /* Peer's portals id */ - __u64 peer_incarnation; /* peer's incarnation */ - __u64 peer_myincarnation; /* my incarnation at HELLO */ - int peer_sent_hello; /* have I sent HELLO? */ - int peer_credits; /* number of send credits */ - int peer_outstanding_credits;/* number of peer credits to return */ - int peer_sent_credits; /* #msg buffers posted for peer */ - int peer_max_msg_size; /* peer's rx buffer size */ - int peer_error; /* errno on closing this peer */ - int peer_retry_noop; /* need to retry returning credits */ - int peer_check_stamp; /* watchdog check stamp */ - cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */ - __u64 peer_next_matchbits; /* Next value to register RDMA from peer */ - __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */ -}; - -struct kptl_data -{ - int kptl_init; /* initialisation state */ - volatile int kptl_shutdown; /* shut down? */ - atomic_t kptl_nthreads; /* # live threads */ - lnet_ni_t *kptl_ni; /* _the_ LND instance */ - ptl_handle_ni_t kptl_nih; /* network inteface handle */ - ptl_process_id_t kptl_portals_id; /* Portals ID of interface */ - __u64 kptl_incarnation; /* which one am I */ - ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */ - - spinlock_t kptl_sched_lock; /* serialise... */ - wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */ - struct list_head kptl_sched_txq; /* tx requiring attention */ - struct list_head kptl_sched_rxq; /* rx requiring attention */ - struct list_head kptl_sched_rxbq; /* rxb requiring reposting */ - - wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */ - - kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */ - cfs_mem_cache_t* kptl_rx_cache; /* rx descripter cache */ - - atomic_t kptl_ntx; /* # tx descs allocated */ - spinlock_t kptl_tx_lock; /* serialise idle tx list*/ - struct list_head kptl_idle_txs; /* idle tx descriptors */ - - rwlock_t kptl_peer_rw_lock; /* lock for peer table */ - struct list_head *kptl_peers; /* hash table of all my known peers */ - struct list_head kptl_closing_peers; /* peers being closed */ - struct list_head kptl_zombie_peers; /* peers waiting for refs to drain */ - int kptl_peer_hash_size; /* size of kptl_peers */ - int kptl_npeers; /* # peers extant */ - int kptl_n_active_peers; /* # active peers */ - int kptl_expected_peers; /* # peers I can buffer HELLOs from */ - - kptl_msg_t *kptl_nak_msg; /* common NAK message */ - spinlock_t kptl_ptlid2str_lock; /* serialise str ops */ -}; - -enum -{ - PTLLND_INIT_NOTHING = 0, - PTLLND_INIT_DATA, - PTLLND_INIT_ALL, -}; - -extern kptl_tunables_t kptllnd_tunables; -extern kptl_data_t kptllnd_data; - -static inline lnet_nid_t -kptllnd_ptl2lnetnid(ptl_nid_t ptl_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - LNET_NIDADDR(ptl_nid)); -#else - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_ni->ni_nid), - ptl_nid); -#endif -} - -static inline ptl_nid_t -kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_portals_id.nid), - LNET_NIDADDR(lnet_nid)); -#else - return LNET_NIDADDR(lnet_nid); -#endif -} - -int kptllnd_startup(lnet_ni_t *ni); -void kptllnd_shutdown(lnet_ni_t *ni); -int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep); -void kptllnd_eq_callback(ptl_event_t *evp); -int kptllnd_scheduler(void *arg); -int kptllnd_watchdog(void *arg); -int kptllnd_thread_start(int (*fn)(void *arg), void *arg); -int kptllnd_tunables_init(void); -void kptllnd_tunables_fini(void); - -const char *kptllnd_evtype2str(int evtype); -const char *kptllnd_msgtype2str(int msgtype); -const char *kptllnd_errtype2str(int errtype); - -static inline void * -kptllnd_eventarg2obj (kptl_eventarg_t *eva) -{ - switch (eva->eva_type) { - default: - LBUG(); - case PTLLND_EVENTARG_TYPE_BUF: - return list_entry(eva, kptl_rx_buffer_t, rxb_eventarg); - case PTLLND_EVENTARG_TYPE_RDMA: - return list_entry(eva, kptl_tx_t, tx_rdma_eventarg); - case PTLLND_EVENTARG_TYPE_MSG: - return list_entry(eva, kptl_tx_t, tx_msg_eventarg); - } -} - -/* - * RX BUFFER SUPPORT FUNCTIONS - */ -void kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp); -void kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp); -int kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_callback(ptl_event_t *ev); -void kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb); - -static inline int -kptllnd_rx_buffer_size(void) -{ - return PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages); -} - -static inline void -kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - rxb->rxb_refcount++; - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); -} - -static inline void -kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb) -{ - if (--(rxb->rxb_refcount) == 0) { - spin_lock(&kptllnd_data.kptl_sched_lock); - - list_add_tail(&rxb->rxb_repost_list, - &kptllnd_data.kptl_sched_rxbq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock(&kptllnd_data.kptl_sched_lock); - } -} - -static inline void -kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - int count; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - count = --(rxb->rxb_refcount); - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); - - if (count == 0) - kptllnd_rx_buffer_post(rxb); -} - -/* - * RX SUPPORT FUNCTIONS - */ -void kptllnd_rx_done(kptl_rx_t *rx); -void kptllnd_rx_parse(kptl_rx_t *rx); - -/* - * PEER SUPPORT FUNCTIONS - */ -int kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits); -void kptllnd_peer_destroy(kptl_peer_t *peer); -int kptllnd_peer_del(lnet_process_id_t id); -void kptllnd_peer_close_locked(kptl_peer_t *peer, int why); -void kptllnd_peer_close(kptl_peer_t *peer, int why); -void kptllnd_handle_closing_peers(void); -int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid); -void kptllnd_peer_check_sends(kptl_peer_t *peer); -void kptllnd_peer_check_bucket(int idx, int stamp); -void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag); -int kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target); -kptl_peer_t *kptllnd_peer_handle_hello(ptl_process_id_t initiator, - kptl_msg_t *msg); -kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id); -void kptllnd_peer_alive(kptl_peer_t *peer); - -static inline void -kptllnd_peer_addref (kptl_peer_t *peer) -{ - atomic_inc(&peer->peer_refcount); -} - -static inline void -kptllnd_peer_decref (kptl_peer_t *peer) -{ - if (atomic_dec_and_test(&peer->peer_refcount)) - kptllnd_peer_destroy(peer); -} - -static inline void -kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer) -{ - LASSERT (tx->tx_peer == NULL); - - kptllnd_peer_addref(peer); - tx->tx_peer = peer; -} - -static inline struct list_head * -kptllnd_nid2peerlist(lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % - kptllnd_data.kptl_peer_hash_size; - - return &kptllnd_data.kptl_peers[hash]; -} - -static inline kptl_peer_t * -kptllnd_id2peer(lnet_process_id_t id) -{ - kptl_peer_t *peer; - unsigned long flags; - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - peer = kptllnd_id2peer_locked(id); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return peer; -} - -static inline int -kptllnd_reserve_buffers(int n) -{ - return kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - n); -} - -static inline int -kptllnd_peer_reserve_buffers(void) -{ - return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peercredits); -} - -static inline void -kptllnd_peer_unreserve_buffers(void) -{ - kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool, - *kptllnd_tunables.kptl_peercredits); -} - -/* - * TX SUPPORT FUNCTIONS - */ -int kptllnd_setup_tx_descs(void); -void kptllnd_cleanup_tx_descs(void); -void kptllnd_tx_fini(kptl_tx_t *tx); -kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose); -void kptllnd_tx_callback(ptl_event_t *ev); -const char *kptllnd_tx_typestr(int type); - -static inline void -kptllnd_tx_addref(kptl_tx_t *tx) -{ - atomic_inc(&tx->tx_refcount); -} - -static inline void -kptllnd_tx_decref(kptl_tx_t *tx) -{ - LASSERT (!in_interrupt()); /* Thread context only */ - - if (atomic_dec_and_test(&tx->tx_refcount)) - kptllnd_tx_fini(tx); -} - -/* - * MESSAGE SUPPORT FUNCTIONS - */ -void kptllnd_init_msg(kptl_msg_t *msg, int type, int body_nob); -void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer); -int kptllnd_msg_unpack(kptl_msg_t *msg, int nob); - -/* - * MISC SUPPORT FUNCTIONS - */ -void kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob); -char *kptllnd_ptlid2str(ptl_process_id_t id); - -void kptllnd_init_ptltrace(void); -void kptllnd_dump_ptltrace(void); - -#ifdef PJK_DEBUGGING -#define SIMULATION_FAIL_TX_PUT_ALLOC 0 /* 0x00000001 */ -#define SIMULATION_FAIL_TX_GET_ALLOC 1 /* 0x00000002 */ -#define SIMULATION_FAIL_TX 2 /* 0x00000004 */ -#define SIMULATION_FAIL_RX_ALLOC 3 /* 0x00000008 */ - -#define IS_SIMULATION_ENABLED(x) \ - (((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0) -#else -#define IS_SIMULATION_ENABLED(x) 0 -#endif - diff --git a/lnet/klnds/ptllnd/ptllnd_cb.c b/lnet/klnds/ptllnd/ptllnd_cb.c deleted file mode 100644 index 75344e17ae794ecdfcd611bb62beb477c910ea0b..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_cb.c +++ /dev/null @@ -1,811 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -#ifndef _USING_LUSTRE_PORTALS_ -int -kptllnd_extract_iov (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} - -int -kptllnd_extract_phys (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the physical addresses of the subset of 'src' - * starting at 'offset', for exactly 'len' bytes, and return the number - * of entries. NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - __u64 phys_page; - __u64 phys; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = min(src->kiov_len - offset, len); - phys_page = lnet_page2phys(src->kiov_page); - phys = phys_page + src->kiov_offset + offset; - - LASSERT (sizeof(void *) > 4 || - (phys <= 0xffffffffULL && - phys + (frag_len - 1) <= 0xffffffffULL)); - - dst->iov_base = (void *)((unsigned long)phys); - dst->iov_len = frag_len; - - if (frag_len == len) - return niov; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob) -{ - LASSERT (iov == NULL || kiov == NULL); - - memset(&tx->tx_rdma_md, 0, sizeof(tx->tx_rdma_md)); - - tx->tx_rdma_md.start = tx->tx_frags; - tx->tx_rdma_md.user_ptr = &tx->tx_rdma_eventarg; - tx->tx_rdma_md.eq_handle = kptllnd_data.kptl_eqh; - tx->tx_rdma_md.options = PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_PUT_REQUEST: /* passive: peer gets */ - tx->tx_rdma_md.threshold = 1; /* GET event */ - tx->tx_rdma_md.options |= PTL_MD_OP_GET; - break; - - case TX_TYPE_GET_REQUEST: /* passive: peer puts */ - tx->tx_rdma_md.threshold = 1; /* PUT event */ - tx->tx_rdma_md.options |= PTL_MD_OP_PUT; - break; - - case TX_TYPE_PUT_RESPONSE: /* active: I get */ - tx->tx_rdma_md.threshold = 2; /* SEND + REPLY */ - break; - - case TX_TYPE_GET_RESPONSE: /* active: I put */ - tx->tx_rdma_md.threshold = tx->tx_acked ? 2 : 1; /* SEND + ACK? */ - break; - } - - if (nob == 0) { - tx->tx_rdma_md.length = 0; - return; - } - -#ifdef _USING_LUSTRE_PORTALS_ - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - lnet_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - /* Cheating OK since ptl_kiov_t == lnet_kiov_t */ - CLASSERT(sizeof(ptl_kiov_t) == sizeof(lnet_kiov_t)); - CLASSERT(offsetof(ptl_kiov_t, kiov_offset) == - offsetof(lnet_kiov_t, kiov_offset)); - CLASSERT(offsetof(ptl_kiov_t, kiov_page) == - offsetof(lnet_kiov_t, kiov_page)); - CLASSERT(offsetof(ptl_kiov_t, kiov_len) == - offsetof(lnet_kiov_t, kiov_len)); - - tx->tx_rdma_md.options |= PTL_MD_KIOV; - tx->tx_rdma_md.length = - lnet_extract_kiov(PTL_MD_MAX_IOV, tx->tx_frags->kiov, - niov, kiov, offset, nob); -#else - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - kptllnd_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - tx->tx_rdma_md.options |= PTL_MD_IOVEC | PTL_MD_PHYS; - tx->tx_rdma_md.length = - kptllnd_extract_phys(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, kiov, offset, nob); -#endif -} - -int -kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, int nob) -{ - kptl_tx_t *tx; - ptl_err_t ptlrc; - kptl_msg_t *rxmsg = rx->rx_msg; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - ptl_handle_md_t mdh; - - LASSERT (type == TX_TYPE_PUT_RESPONSE || - type == TX_TYPE_GET_RESPONSE); - - tx = kptllnd_get_idle_tx(type); - if (tx == NULL) { - CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n", - type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT", - libcfs_id2str(peer->peer_id)); - return -ENOMEM; - } - - kptllnd_set_tx_peer(tx, peer); - kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob); - - ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, - PTL_UNLINK, &mdh); - if (ptlrc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %d\n", - libcfs_id2str(peer->peer_id), ptlrc); - tx->tx_status = -EIO; - kptllnd_tx_decref(tx); - return -EIO; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_lnet_msg = lntmsg; - /* lnet_finalize() will be called when tx is torn down, so I must - * return success from here on... */ - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_rdma_mdh = mdh; - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - /* peer has now got my ref on 'tx' */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx->tx_tposted = jiffies; - - if (type == TX_TYPE_GET_RESPONSE) - ptlrc = PtlPut(mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* offset */ - (lntmsg != NULL) ? /* header data */ - PTLLND_RDMA_OK : - PTLLND_RDMA_FAIL); - else - ptlrc = PtlGet(mdh, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0); /* offset */ - - if (ptlrc != PTL_OK) { - CERROR("Ptl%s failed: %d\n", - (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", ptlrc); - - kptllnd_peer_close(peer, -EIO); - /* Everything (including this RDMA) queued on the peer will - * be completed with failure */ - } - - return 0; -} - -int -kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kptl_peer_t *peer; - kptl_tx_t *tx; - int nob; - int nfrag; - int rc; - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); /* !!! */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt()); - - rc = kptllnd_find_target(&peer, target); - if (rc != 0) - return rc; - - switch (type) { - default: - LBUG(); - return -EINVAL; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Should the payload avoid RDMA? */ - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[payload_nob]); - if (payload_kiov == NULL && - nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_PUT_REQUEST); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - kptllnd_init_rdma_md(tx, payload_niov, - payload_iov, payload_kiov, - payload_offset, payload_nob); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_PUT, - sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive PUT p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_GET: - /* routed gets don't RDMA */ - if (target_is_router || routing) - break; - - /* Is the payload small enough not to need RDMA? */ - nob = lntmsg->msg_md->md_length; - nob = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_GET_REQUEST); - if (tx == NULL) { - CERROR("Can't send GET to %s: can't allocate descriptor\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_replymsg = - lnet_create_reply_msg(kptllnd_data.kptl_ni, lntmsg); - if (tx->tx_lnet_replymsg == NULL) { - CERROR("Failed to allocate LNET reply for %s\n", - libcfs_id2str(target)); - kptllnd_tx_decref(tx); - rc = -ENOMEM; - goto out; - } - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, NULL, - 0, lntmsg->msg_md->md_length); - else - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - NULL, lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET, - sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive GET p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_ACK: - CDEBUG(D_NET, "LNET_MSG_ACK\n"); - LASSERT (payload_nob == 0); - break; - } - - /* I don't have to handle kiovs */ - LASSERT (payload_nob == 0 || payload_iov != NULL); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.immediate.kptlim_hdr = *hdr; - - if (payload_nob == 0) { - nfrag = 0; - } else { - tx->tx_frags->iov[0].iov_base = tx->tx_msg; - tx->tx_frags->iov[0].iov_len = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload); - - /* NB relying on lustre not asking for PTL_MD_MAX_IOV - * fragments!! */ -#ifdef _USING_LUSTRE_PORTALS_ - nfrag = 1 + lnet_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#else - nfrag = 1 + kptllnd_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#endif - } - - nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]); - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, nob); - - CDEBUG(D_NETTRACE, "%s: immediate %s p %d %p\n", - libcfs_id2str(target), - lnet_msgtyp2str(lntmsg->msg_type), - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(lntmsg->msg_hdr.msg.get.ptl_index) : -1, - tx); - - kptllnd_tx_launch(peer, tx, nfrag); - - out: - kptllnd_peer_decref(peer); - return rc; -} - -int -kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep) -{ - kptl_rx_t *rx = private; - - CDEBUG(D_NET, "Eager RX=%p RXB=%p\n", rx, rx->rx_rxb); - - /* I have to release my ref on rxb (if I have one) to ensure I'm an - * eager receiver, so I copy the incoming request from the buffer it - * landed in, into space reserved in the descriptor... */ - -#if (PTL_MD_LOCAL_ALIGN8 == 0) - if (rx->rx_rxb == NULL) /* already copied */ - return 0; /* to fix alignment */ -#else - LASSERT(rx->rx_rxb != NULL); -#endif - LASSERT(rx->rx_nob <= *kptllnd_tunables.kptl_max_msg_size); - - memcpy(rx->rx_space, rx->rx_msg, rx->rx_nob); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - - kptllnd_rx_buffer_decref(rx->rx_rxb); - rx->rx_rxb = NULL; - - return 0; -} - - -int -kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kptl_rx_t *rx = private; - kptl_msg_t *rxmsg = rx->rx_msg; - int nob; - int rc; - - CDEBUG(D_NET, "%s niov=%d offset=%d mlen=%d rlen=%d\n", - kptllnd_msgtype2str(rxmsg->ptlm_type), - niov, offset, mlen, rlen); - - LASSERT (mlen <= rlen); - LASSERT (mlen >= 0); - LASSERT (!in_interrupt()); - LASSERT (!(kiov != NULL && iov != NULL)); /* never both */ - LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */ - -#ifdef CRAY_XT3 - if (lntmsg != NULL && - rx->rx_uid != 0) { - /* Set the UID if the sender's uid isn't 0; i.e. non-root - * running in userspace (e.g. a catamount node; linux kernel - * senders, including routers have uid 0). If this is a lustre - * RPC request, this tells lustre not to trust the creds in the - * RPC message body. */ - lnet_set_msg_uid(ni, lntmsg, rx->rx_uid); - } -#endif - switch(rxmsg->ptlm_type) - { - default: - LBUG(); - rc = -EINVAL; - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE %d,%d\n", mlen, rlen); - - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_id2str(rx->rx_peer->peer_id), nob, - rx->rx_nob); - rc = -EINVAL; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov( - niov, kiov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - else - lnet_copy_flat2iov( - niov, iov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - - lnet_finalize (ni, lntmsg, 0); - rc = 0; - break; - - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete. I send - * success/failure in the portals 'hdr_data' */ - - if (lntmsg == NULL) - rc = kptllnd_active_rdma(rx, NULL, - TX_TYPE_GET_RESPONSE, - 0, NULL, NULL, 0, 0); - else - rc = kptllnd_active_rdma(rx, lntmsg, - TX_TYPE_GET_RESPONSE, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - break; - - case PTLLND_MSG_TYPE_PUT: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete; it'll be 0 - * bytes if there was no match (lntmsg == NULL). I have no way - * to let my peer know this, but she's only interested in when - * the net has stopped accessing her buffer in any case. */ - - rc = kptllnd_active_rdma(rx, lntmsg, TX_TYPE_PUT_RESPONSE, - niov, iov, kiov, offset, mlen); - break; - } - - /* - * We're done with the RX - */ - kptllnd_rx_done(rx); - return rc; -} - -void -kptllnd_eq_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - - switch (eva->eva_type) { - default: - LBUG(); - - case PTLLND_EVENTARG_TYPE_MSG: - case PTLLND_EVENTARG_TYPE_RDMA: - kptllnd_tx_callback(ev); - break; - - case PTLLND_EVENTARG_TYPE_BUF: - kptllnd_rx_buffer_callback(ev); - break; - } -} - -void -kptllnd_thread_fini (void) -{ - atomic_dec(&kptllnd_data.kptl_nthreads); -} - -int -kptllnd_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid; - - atomic_inc(&kptllnd_data.kptl_nthreads); - - pid = kernel_thread (fn, arg, 0); - if (pid >= 0) - return 0; - - CERROR("Failed to start kernel_thread: error %d\n", (int)pid); - kptllnd_thread_fini(); - return (int)pid; -} - -int -kptllnd_watchdog(void *arg) -{ - int id = (long)arg; - char name[16]; - wait_queue_t waitlink; - int stamp = 0; - int peer_index = 0; - unsigned long deadline = jiffies; - int timeout; - int i; - - snprintf(name, sizeof(name), "kptllnd_wd_%02d", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&waitlink, current); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - timeout = (int)(deadline - jiffies); - - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kptllnd_data.kptl_peer_hash_size; - - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if ((*kptllnd_tunables.kptl_timeout) > n * p) - chunk = (chunk * n * p) / - (*kptllnd_tunables.kptl_timeout); - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kptllnd_peer_check_bucket(peer_index, stamp); - peer_index = (peer_index + 1) % - kptllnd_data.kptl_peer_hash_size; - } - - deadline += p * HZ; - stamp++; - continue; - } - - kptllnd_handle_closing_peers(); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_watchdog_waitq, - &waitlink); - - schedule_timeout(timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_watchdog_waitq, &waitlink); - } - - kptllnd_thread_fini(); - CDEBUG(D_NET, "<<<\n"); - return (0); -}; - -int -kptllnd_scheduler (void *arg) -{ - int id = (long)arg; - char name[16]; - wait_queue_t waitlink; - unsigned long flags; - int did_something; - int counter = 0; - kptl_rx_t *rx; - kptl_rx_buffer_t *rxb; - kptl_tx_t *tx; - - snprintf(name, sizeof(name), "kptllnd_sd_%02d", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&waitlink, current); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - did_something = 0; - - if (!list_empty(&kptllnd_data.kptl_sched_rxq)) { - rx = list_entry (kptllnd_data.kptl_sched_rxq.next, - kptl_rx_t, rx_list); - list_del(&rx->rx_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - - kptllnd_rx_parse(rx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (!list_empty(&kptllnd_data.kptl_sched_rxbq)) { - rxb = list_entry (kptllnd_data.kptl_sched_rxbq.next, - kptl_rx_buffer_t, rxb_repost_list); - list_del(&rxb->rxb_repost_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - - kptllnd_rx_buffer_post(rxb); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (!list_empty(&kptllnd_data.kptl_sched_txq)) { - tx = list_entry (kptllnd_data.kptl_sched_txq.next, - kptl_tx_t, tx_list); - list_del_init(&tx->tx_list); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - kptllnd_tx_fini(tx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - } - - if (did_something) { - if (++counter != *kptllnd_tunables.kptl_reschedule_loops) - continue; - } - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_sched_waitq, - &waitlink); - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - if (!did_something) - schedule(); - else - cond_resched(); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_sched_waitq, &waitlink); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - counter = 0; - } - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - kptllnd_thread_fini(); - return 0; -} - diff --git a/lnet/klnds/ptllnd/ptllnd_modparams.c b/lnet/klnds/ptllnd/ptllnd_modparams.c deleted file mode 100644 index 03134138e9422bd32d235c33df20720d0ecef95b..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_modparams.c +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - -#include "ptllnd.h" - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of TX descriptors"); - -static int max_nodes = 1152; -CFS_MODULE_PARM(max_nodes, "i", int, 0444, - "maximum number of peer nodes"); - -static int max_procs_per_node = 2; -CFS_MODULE_PARM(max_procs_per_node, "i", int, 0444, - "maximum number of processes per peer node to cache"); - -static int checksum = 0; -CFS_MODULE_PARM(checksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int portal = PTLLND_PORTAL; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(portal, "i", int, 0444, - "portal id"); - -static int pid = PTLLND_PID; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(pid, "i", int, 0444, - "portals pid"); - -static int rxb_npages = 1; -CFS_MODULE_PARM(rxb_npages, "i", int, 0444, - "# of pages per rx buffer"); - -static int rxb_nspare = 8; -CFS_MODULE_PARM(rxb_nspare, "i", int, 0444, - "# of spare rx buffers"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "concurrent sends"); - -static int peercredits = PTLLND_PEERCREDITS; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(peercredits, "i", int, 0444, - "concurrent sends to 1 peer"); - -static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* <lnet/ptllnd_wire.h> */ -CFS_MODULE_PARM(max_msg_size, "i", int, 0444, - "max size of immediate message"); - -static int peer_hash_table_size = 101; -CFS_MODULE_PARM(peer_hash_table_size, "i", int, 0444, - "# of slots in the peer hash table"); - -static int reschedule_loops = 100; -CFS_MODULE_PARM(reschedule_loops, "i", int, 0644, - "# of loops before scheduler does cond_resched()"); - -static int ack_puts = 0; -CFS_MODULE_PARM(ack_puts, "i", int, 0644, - "get portals to ack all PUTs"); - -#ifdef CRAY_XT3 -static int ptltrace_on_timeout = 1; -CFS_MODULE_PARM(ptltrace_on_timeout, "i", int, 0644, - "dump ptltrace on timeout"); - -static char *ptltrace_basename = "/tmp/lnet-ptltrace"; -CFS_MODULE_PARM(ptltrace_basename, "s", charp, 0644, - "ptltrace dump file basename"); -#endif -#ifdef PJK_DEBUGGING -static int simulation_bitmap = 0; -CFS_MODULE_PARM(simulation_bitmap, "i", int, 0444, - "simulation bitmap"); -#endif - - -kptl_tunables_t kptllnd_tunables = { - .kptl_ntx = &ntx, - .kptl_max_nodes = &max_nodes, - .kptl_max_procs_per_node = &max_procs_per_node, - .kptl_checksum = &checksum, - .kptl_portal = &portal, - .kptl_pid = &pid, - .kptl_timeout = &timeout, - .kptl_rxb_npages = &rxb_npages, - .kptl_rxb_nspare = &rxb_nspare, - .kptl_credits = &credits, - .kptl_peercredits = &peercredits, - .kptl_max_msg_size = &max_msg_size, - .kptl_peer_hash_table_size = &peer_hash_table_size, - .kptl_reschedule_loops = &reschedule_loops, - .kptl_ack_puts = &ack_puts, -#ifdef CRAY_XT3 - .kptl_ptltrace_on_timeout = &ptltrace_on_timeout, - .kptl_ptltrace_basename = &ptltrace_basename, -#endif -#ifdef PJK_DEBUGGING - .kptl_simulation_bitmap = &simulation_bitmap, -#endif -}; - - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -#ifdef CRAY_XT3 -static char ptltrace_basename_space[1024]; - -static void -kptllnd_init_strtunable(char **str_param, char *space, int size) -{ - strncpy(space, *str_param, size); - space[size - 1] = 0; - *str_param = space; -} -#endif - -static ctl_table kptllnd_ctl_table[] = { - {1, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {2, "max_nodes", &max_nodes, - sizeof(int), 0444, NULL, &proc_dointvec}, - {3, "max_procs_per_node", &max_procs_per_node, - sizeof(int), 0444, NULL, &proc_dointvec}, - {4, "checksum", &checksum, - sizeof(int), 0644, NULL, &proc_dointvec}, - {5, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {6, "portal", &portal, - sizeof(int), 0444, NULL, &proc_dointvec}, - {7, "pid", &pid, - sizeof(int), 0444, NULL, &proc_dointvec}, - {8, "rxb_npages", &rxb_npages, - sizeof(int), 0444, NULL, &proc_dointvec}, - {9, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {10, "peercredits", &peercredits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {11, "max_msg_size", &max_msg_size, - sizeof(int), 0444, NULL, &proc_dointvec}, - {12, "peer_hash_table_size", &peer_hash_table_size, - sizeof(int), 0444, NULL, &proc_dointvec}, - {13, "reschedule_loops", &reschedule_loops, - sizeof(int), 0444, NULL, &proc_dointvec}, - {14, "ack_puts", &ack_puts, - sizeof(int), 0644, NULL, &proc_dointvec}, -#ifdef CRAY_XT3 - {15, "ptltrace_on_timeout", &ptltrace_on_timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {16, "ptltrace_basename", ptltrace_basename_space, - sizeof(ptltrace_basename_space), 0644, NULL, &proc_dostring, - &sysctl_string}, -#endif -#ifdef PJK_DEBUGGING - {17, "simulation_bitmap", &simulation_bitmap, - sizeof(int), 0444, NULL, &proc_dointvec}, -#endif - - {0} -}; - -static ctl_table kptllnd_top_ctl_table[] = { - {203, "ptllnd", NULL, 0, 0555, kptllnd_ctl_table}, - {0} -}; - -int -kptllnd_tunables_init () -{ -#ifdef CRAY_XT3 - kptllnd_init_strtunable(&ptltrace_basename, - ptltrace_basename_space, - sizeof(ptltrace_basename_space)); -#endif - kptllnd_tunables.kptl_sysctl = - register_sysctl_table(kptllnd_top_ctl_table, 0); - - if (kptllnd_tunables.kptl_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kptllnd_tunables_fini () -{ - if (kptllnd_tunables.kptl_sysctl != NULL) - unregister_sysctl_table(kptllnd_tunables.kptl_sysctl); -} - -#else - -int -kptllnd_tunables_init () -{ - return 0; -} - -void -kptllnd_tunables_fini () -{ -} - -#endif - diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c deleted file mode 100644 index c47fd0443dd537a66521a1fc604a6284205d3031..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ /dev/null @@ -1,1278 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * E Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" -#include <libcfs/list.h> - -static int -kptllnd_count_queue(struct list_head *q) -{ - struct list_head *e; - int n = 0; - - list_for_each(e, q) { - n++; - } - - return n; -} - -int -kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - unsigned long flags; - struct list_head *ptmp; - kptl_peer_t *peer; - int i; - int rc = -ENOENT; - - read_lock_irqsave(g_lock, flags); - - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) { - - list_for_each (ptmp, &kptllnd_data.kptl_peers[i]) { - peer = list_entry(ptmp, kptl_peer_t, peer_list); - - if (index-- > 0) - continue; - - *id = peer->peer_id; - *state = peer->peer_state; - *sent_hello = peer->peer_sent_hello; - *refcount = atomic_read(&peer->peer_refcount); - *incarnation = peer->peer_incarnation; - - spin_lock(&peer->peer_lock); - - *next_matchbits = peer->peer_next_matchbits; - *last_matchbits_seen = peer->peer_last_matchbits_seen; - *credits = peer->peer_credits; - *outstanding_credits = peer->peer_outstanding_credits; - - *nsendq = kptllnd_count_queue(&peer->peer_sendq); - *nactiveq = kptllnd_count_queue(&peer->peer_activeq); - - spin_unlock(&peer->peer_lock); - - rc = 0; - goto out; - } - } - - out: - read_unlock_irqrestore(g_lock, flags); - return rc; -} - -void -kptllnd_peer_add_peertable_locked (kptl_peer_t *peer) -{ - LASSERT (kptllnd_data.kptl_n_active_peers < - kptllnd_data.kptl_expected_peers); - - LASSERT (peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - kptllnd_data.kptl_n_active_peers++; - atomic_inc(&peer->peer_refcount); /* +1 ref for the list */ - - /* NB add to HEAD of peer list for MRU order! - * (see kptllnd_cull_peertable) */ - list_add(&peer->peer_list, kptllnd_nid2peerlist(peer->peer_id.nid)); -} - -void -kptllnd_cull_peertable_locked (lnet_process_id_t pid) -{ - /* I'm about to add a new peer with this portals ID to the peer table, - * so (a) this peer should not exist already and (b) I want to leave at - * most (max_procs_per_nid - 1) peers with this NID in the table. */ - struct list_head *peers = kptllnd_nid2peerlist(pid.nid); - int cull_count = *kptllnd_tunables.kptl_max_procs_per_node; - int count; - struct list_head *tmp; - struct list_head *nxt; - kptl_peer_t *peer; - - count = 0; - list_for_each_safe (tmp, nxt, peers) { - /* NB I rely on kptllnd_peer_add_peertable_locked to add peers - * in MRU order */ - peer = list_entry(tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid != pid.nid) - continue; - - LASSERT (peer->peer_id.pid != pid.pid); - - count++; - - if (count < cull_count) /* recent (don't cull) */ - continue; - - CDEBUG(D_NET, "Cull %s(%s)\n", - libcfs_id2str(peer->peer_id), - kptllnd_ptlid2str(peer->peer_ptlid)); - - kptllnd_peer_close_locked(peer, 0); - } -} - -kptl_peer_t * -kptllnd_peer_allocate (lnet_process_id_t lpid, ptl_process_id_t ppid) -{ - unsigned long flags; - kptl_peer_t *peer; - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Can't create peer %s (%s)\n", - libcfs_id2str(lpid), - kptllnd_ptlid2str(ppid)); - return NULL; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - INIT_LIST_HEAD (&peer->peer_sendq); - INIT_LIST_HEAD (&peer->peer_activeq); - spin_lock_init (&peer->peer_lock); - - peer->peer_state = PEER_STATE_ALLOCATED; - peer->peer_error = 0; - peer->peer_last_alive = cfs_time_current(); - peer->peer_id = lpid; - peer->peer_ptlid = ppid; - peer->peer_credits = 1; /* enough for HELLO */ - peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS; - peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peercredits - 1; - peer->peer_sent_credits = 1; /* HELLO credit is implicit */ - peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */ - - atomic_set(&peer->peer_refcount, 1); /* 1 ref for caller */ - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - peer->peer_myincarnation = kptllnd_data.kptl_incarnation; - - /* Only increase # peers under lock, to guarantee we dont grow it - * during shutdown */ - if (kptllnd_data.kptl_shutdown) { - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - LIBCFS_FREE(peer, sizeof(*peer)); - return NULL; - } - - kptllnd_data.kptl_npeers++; - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return peer; -} - -void -kptllnd_peer_destroy (kptl_peer_t *peer) -{ - unsigned long flags; - - CDEBUG(D_NET, "Peer=%p\n", peer); - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&peer->peer_refcount) == 0); - LASSERT (peer->peer_state == PEER_STATE_ALLOCATED || - peer->peer_state == PEER_STATE_ZOMBIE); - LASSERT (list_empty(&peer->peer_sendq)); - LASSERT (list_empty(&peer->peer_activeq)); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (peer->peer_state == PEER_STATE_ZOMBIE) - list_del(&peer->peer_list); - - kptllnd_data.kptl_npeers--; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - LIBCFS_FREE (peer, sizeof (*peer)); -} - -void -kptllnd_cancel_txlist (struct list_head *peerq, struct list_head *txs) -{ - struct list_head *tmp; - struct list_head *nxt; - kptl_tx_t *tx; - - list_for_each_safe (tmp, nxt, peerq) { - tx = list_entry(tmp, kptl_tx_t, tx_list); - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, txs); - - tx->tx_status = -EIO; - tx->tx_active = 0; - } -} - -void -kptllnd_peer_cancel_txs(kptl_peer_t *peer, struct list_head *txs) -{ - unsigned long flags; - - spin_lock_irqsave(&peer->peer_lock, flags); - - kptllnd_cancel_txlist(&peer->peer_sendq, txs); - kptllnd_cancel_txlist(&peer->peer_activeq, txs); - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - -void -kptllnd_peer_alive (kptl_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->peer_last_alive = cfs_time_current(); - mb(); -} - -void -kptllnd_peer_notify (kptl_peer_t *peer) -{ - unsigned long flags; - time_t last_alive = 0; - int error = 0; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (peer->peer_error != 0) { - error = peer->peer_error; - peer->peer_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->peer_last_alive); - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (error != 0) - lnet_notify (kptllnd_data.kptl_ni, peer->peer_id.nid, 0, - last_alive); -} - -void -kptllnd_handle_closing_peers () -{ - unsigned long flags; - struct list_head txs; - kptl_peer_t *peer; - struct list_head *tmp; - struct list_head *nxt; - kptl_tx_t *tx; - int idle; - - /* Check with a read lock first to avoid blocking anyone */ - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - idle = list_empty(&kptllnd_data.kptl_closing_peers) && - list_empty(&kptllnd_data.kptl_zombie_peers); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (idle) - return; - - INIT_LIST_HEAD(&txs); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Cancel txs on all zombie peers. NB anyone dropping the last peer - * ref removes it from this list, so I musn't drop the lock while - * scanning it. */ - list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_ZOMBIE); - - kptllnd_peer_cancel_txs(peer, &txs); - } - - /* Notify LNET and cancel txs on closing (i.e. newly closed) peers. NB - * I'm the only one removing from this list, but peers can be added on - * the end any time I drop the lock. */ - - list_for_each_safe (tmp, nxt, &kptllnd_data.kptl_closing_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_CLOSING); - - list_del(&peer->peer_list); - list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_zombie_peers); - peer->peer_state = PEER_STATE_ZOMBIE; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - kptllnd_peer_notify(peer); - kptllnd_peer_cancel_txs(peer, &txs); - kptllnd_peer_decref(peer); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - } - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Drop peer's ref on all cancelled txs. This will get - * kptllnd_tx_fini() to abort outstanding comms if necessary. */ - - list_for_each_safe (tmp, nxt, &txs) { - tx = list_entry(tmp, kptl_tx_t, tx_list); - list_del(&tx->tx_list); - kptllnd_tx_decref(tx); - } -} - -void -kptllnd_peer_close_locked(kptl_peer_t *peer, int why) -{ - switch (peer->peer_state) { - default: - LBUG(); - - case PEER_STATE_WAITING_HELLO: - case PEER_STATE_ACTIVE: - /* Ensure new peers see a new incarnation of me */ - LASSERT(peer->peer_myincarnation <= kptllnd_data.kptl_incarnation); - if (peer->peer_myincarnation == kptllnd_data.kptl_incarnation) - kptllnd_data.kptl_incarnation++; - - /* Removing from peer table */ - kptllnd_data.kptl_n_active_peers--; - LASSERT (kptllnd_data.kptl_n_active_peers >= 0); - - list_del(&peer->peer_list); - kptllnd_peer_unreserve_buffers(); - - peer->peer_error = why; /* stash 'why' only on first close */ - peer->peer_state = PEER_STATE_CLOSING; - - /* Schedule for immediate attention, taking peer table's ref */ - list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_closing_peers); - wake_up(&kptllnd_data.kptl_watchdog_waitq); - break; - - case PEER_STATE_ZOMBIE: - case PEER_STATE_CLOSING: - break; - } -} - -void -kptllnd_peer_close(kptl_peer_t *peer, int why) -{ - unsigned long flags; - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - kptllnd_peer_close_locked(peer, why); - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -int -kptllnd_peer_del(lnet_process_id_t id) -{ - struct list_head *ptmp; - struct list_head *pnxt; - kptl_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - /* - * Find the single bucket we are supposed to look at or if nid is a - * wildcard (LNET_NID_ANY) then look at all of the buckets - */ - if (id.nid != LNET_NID_ANY) { - struct list_head *l = kptllnd_nid2peerlist(id.nid); - - lo = hi = l - kptllnd_data.kptl_peers; - } else { - if (id.pid != LNET_PID_ANY) - return -EINVAL; - - lo = 0; - hi = kptllnd_data.kptl_peer_hash_size - 1; - } - -again: - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kptllnd_data.kptl_peers[i]) { - peer = list_entry (ptmp, kptl_peer_t, peer_list); - - if (!(id.nid == LNET_NID_ANY || - (peer->peer_id.nid == id.nid && - (id.pid == LNET_PID_ANY || - peer->peer_id.pid == id.pid)))) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - kptllnd_peer_close(peer, 0); - kptllnd_peer_decref(peer); /* ...until here */ - - rc = 0; /* matched something */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return (rc); -} - -void -kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */ - ptl_handle_md_t rdma_mdh = PTL_INVALID_HANDLE; - ptl_handle_md_t msg_mdh = PTL_INVALID_HANDLE; - ptl_handle_me_t meh; - ptl_md_t md; - ptl_err_t prc; - unsigned long flags; - - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE || - tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST); - - kptllnd_set_tx_peer(tx, peer); - - if (tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST) { - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* Assume 64-bit matchbits can't wrap */ - LASSERT (peer->peer_next_matchbits >= PTL_RESERVED_MATCHBITS); - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits = - peer->peer_next_matchbits++; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - prc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - peer->peer_ptlid, - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* ignore bits */ - PTL_UNLINK, - PTL_INS_BEFORE, - &meh); - if (prc != PTL_OK) { - CERROR("PtlMEAttach(%s) failed: %d\n", - libcfs_id2str(peer->peer_id), prc); - goto failed; - } - - prc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK, &rdma_mdh); - if (prc != PTL_OK) { - CERROR("PtlMDAttach(%s) failed: %d\n", - libcfs_id2str(tx->tx_peer->peer_id), prc); - prc = PtlMEUnlink(meh); - LASSERT(prc == PTL_OK); - rdma_mdh = PTL_INVALID_HANDLE; - goto failed; - } - - /* I'm not racing with the event callback here. It's a bug if - * there's an event on the MD I just attached before I actually - * send the RDMA request message which the event callback - * catches by asserting 'rdma_mdh' is valid. */ - } - - memset(&md, 0, sizeof(md)); - - md.threshold = tx->tx_acked ? 2 : 1; /* SEND END + ACK? */ - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - md.user_ptr = &tx->tx_msg_eventarg; - md.eq_handle = kptllnd_data.kptl_eqh; - - if (nfrag == 0) { - md.start = tx->tx_msg; - md.length = tx->tx_msg->ptlm_nob; - } else { - LASSERT (nfrag > 1); - LASSERT (tx->tx_frags->iov[0].iov_base == (void *)tx->tx_msg); - - md.start = tx->tx_frags; - md.length = nfrag; - md.options |= PTL_MD_IOVEC; - } - - prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh); - if (prc != PTL_OK) { - msg_mdh = PTL_INVALID_HANDLE; - goto failed; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_active = 1; - tx->tx_rdma_mdh = rdma_mdh; - tx->tx_msg_mdh = msg_mdh; - - /* Ensure HELLO is sent first */ - if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) - list_add(&tx->tx_list, &peer->peer_sendq); - else - list_add_tail(&tx->tx_list, &peer->peer_sendq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - - failed: - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_status = -EIO; - tx->tx_rdma_mdh = rdma_mdh; - tx->tx_msg_mdh = msg_mdh; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - kptllnd_tx_decref(tx); -} - -void -kptllnd_peer_check_sends (kptl_peer_t *peer) -{ - - kptl_tx_t *tx; - int rc; - unsigned long flags; - - LASSERT(!in_interrupt()); - - spin_lock_irqsave(&peer->peer_lock, flags); - - peer->peer_retry_noop = 0; - - if (list_empty(&peer->peer_sendq) && - peer->peer_outstanding_credits >= PTLLND_CREDIT_HIGHWATER && - peer->peer_credits != 0) { - - /* post a NOOP to return credits */ - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't return credits to %s: can't allocate descriptor\n", - libcfs_id2str(peer->peer_id)); - } else { - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP, 0); - kptllnd_post_tx(peer, tx, 0); - } - - spin_lock_irqsave(&peer->peer_lock, flags); - peer->peer_retry_noop = (tx == NULL); - } - - while (!list_empty(&peer->peer_sendq)) { - tx = list_entry (peer->peer_sendq.next, kptl_tx_t, tx_list); - - LASSERT (tx->tx_active); - LASSERT (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE || - !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - - LASSERT (peer->peer_outstanding_credits >= 0); - LASSERT (peer->peer_sent_credits >= 0); - LASSERT (peer->peer_sent_credits + - peer->peer_outstanding_credits <= - *kptllnd_tunables.kptl_peercredits); - LASSERT (peer->peer_credits >= 0); - - /* Ensure HELLO is sent first */ - if (!peer->peer_sent_hello) { - if (tx->tx_msg->ptlm_type != PTLLND_MSG_TYPE_HELLO) - break; - peer->peer_sent_hello = 1; - } - - if (peer->peer_credits == 0) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: no credits for %p\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, tx); - break; - } - - /* Don't use the last credit unless I've got credits to - * return */ - if (peer->peer_credits == 1 && - peer->peer_outstanding_credits == 0) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: " - "not using last credit for %p\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, tx); - break; - } - - list_del(&tx->tx_list); - - /* Discard any NOOP I queued if I'm not at the high-water mark - * any more or more messages have been queued */ - if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP && - (!list_empty(&peer->peer_sendq) || - peer->peer_outstanding_credits < PTLLND_CREDIT_HIGHWATER)) { - - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_id2str(peer->peer_id)); - kptllnd_tx_decref(tx); - - spin_lock_irqsave(&peer->peer_lock, flags); - continue; - } - - /* fill last-minute msg header fields */ - kptllnd_msg_pack(tx->tx_msg, peer); - - peer->peer_sent_credits += peer->peer_outstanding_credits; - peer->peer_outstanding_credits = 0; - peer->peer_credits--; - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s tx=%p nob=%d cred=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_msgtype2str(tx->tx_msg->ptlm_type), - tx, tx->tx_msg->ptlm_nob, - tx->tx_msg->ptlm_credits); - - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_tx_addref(tx); /* 1 ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx->tx_tposted = jiffies; /* going on the wire */ - - rc = PtlPut (tx->tx_msg_mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - peer->peer_ptlid, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - LNET_MSG_MATCHBITS, - 0, /* offset */ - 0); /* header data */ - if (rc != PTL_OK) { - CERROR("PtlPut %s error %d\n", - libcfs_id2str(peer->peer_id), rc); - - /* Nuke everything (including this tx) */ - kptllnd_peer_close(peer, -EIO); - return; - } - - kptllnd_tx_decref(tx); /* drop my ref */ - - spin_lock_irqsave(&peer->peer_lock, flags); - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - -kptl_tx_t * -kptllnd_find_timed_out_tx(kptl_peer_t *peer) -{ - kptl_tx_t *tx; - struct list_head *tmp; - - list_for_each(tmp, &peer->peer_sendq) { - tx = list_entry(peer->peer_sendq.next, kptl_tx_t, tx_list); - - if (time_after_eq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - list_for_each(tmp, &peer->peer_activeq) { - tx = list_entry(peer->peer_activeq.next, kptl_tx_t, tx_list); - - if (time_after_eq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - return NULL; -} - - -void -kptllnd_peer_check_bucket (int idx, int stamp) -{ - struct list_head *peers = &kptllnd_data.kptl_peers[idx]; - struct list_head *ptmp; - kptl_peer_t *peer; - kptl_tx_t *tx; - unsigned long flags; - int nsend; - int nactive; - int check_sends; - - CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp); - - again: - /* NB. Shared lock while I just look */ - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kptl_peer_t, peer_list); - - CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits); - - spin_lock(&peer->peer_lock); - - if (peer->peer_check_stamp == stamp) { - /* checked already this pass */ - spin_unlock(&peer->peer_lock); - continue; - } - - peer->peer_check_stamp = stamp; - tx = kptllnd_find_timed_out_tx(peer); - check_sends = peer->peer_retry_noop; - - spin_unlock(&peer->peer_lock); - - if (tx == NULL && !check_sends) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (tx == NULL) { /* nothing timed out */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); /* ...until here or... */ - - /* rescan after dropping the lock */ - goto again; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - nsend = kptllnd_count_queue(&peer->peer_sendq); - nactive = kptllnd_count_queue(&peer->peer_activeq); - spin_unlock_irqrestore(&peer->peer_lock, flags); - - LCONSOLE_ERROR("Timing out %s: %s\n", - libcfs_id2str(peer->peer_id), - (tx->tx_tposted == 0) ? - "no free peer buffers" : "please check Portals"); - - CERROR("%s timed out: cred %d outstanding %d, sent %d, " - "sendq %d, activeq %d Tx %p %s (%s%s%s) status %d " - "%sposted %lu T/O %ds\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - nsend, nactive, tx, kptllnd_tx_typestr(tx->tx_type), - tx->tx_active ? "A" : "", - PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ? - "" : "M", - PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ? - "" : "D", - tx->tx_status, - (tx->tx_tposted == 0) ? "not " : "", - (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted), - *kptllnd_tunables.kptl_timeout); - - kptllnd_dump_ptltrace(); - - kptllnd_tx_decref(tx); - - kptllnd_peer_close(peer, -ETIMEDOUT); - kptllnd_peer_decref(peer); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -kptl_peer_t * -kptllnd_id2peer_locked (lnet_process_id_t id) -{ - struct list_head *peers = kptllnd_nid2peerlist(id.nid); - struct list_head *tmp; - kptl_peer_t *peer; - - list_for_each (tmp, peers) { - - peer = list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - if (peer->peer_id.nid != id.nid || - peer->peer_id.pid != id.pid) - continue; - - kptllnd_peer_addref(peer); - - CDEBUG(D_NET, "%s -> %s (%d)\n", - libcfs_id2str(id), - kptllnd_ptlid2str(peer->peer_ptlid), - atomic_read (&peer->peer_refcount)); - return peer; - } - - return NULL; -} - -void -kptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id) -{ - LCONSOLE_ERROR("%s %s overflows the peer table[%d]: " - "messages may be dropped\n", - str, libcfs_id2str(id), - kptllnd_data.kptl_n_active_peers); - LCONSOLE_ERROR("Please correct by increasing " - "'max_nodes' or 'max_procs_per_node'\n"); -} - -__u64 -kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid) -{ - kptl_peer_t *peer; - struct list_head *tmp; - - /* Find the last matchbits I saw this new peer using. Note.. - A. This peer cannot be in the peer table - she's new! - B. If I can't find the peer in the closing/zombie peers, all - matchbits are safe because all refs to the (old) peer have gone - so all txs have completed so there's no risk of matchbit - collision! - */ - - LASSERT(kptllnd_id2peer_locked(lpid) == NULL); - - /* peer's last matchbits can't change after it comes out of the peer - * table, so first match is fine */ - - list_for_each (tmp, &kptllnd_data.kptl_closing_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid == lpid.nid && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = list_entry (tmp, kptl_peer_t, peer_list); - - if (peer->peer_id.nid == lpid.nid && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - return PTL_RESERVED_MATCHBITS; -} - -kptl_peer_t * -kptllnd_peer_handle_hello (ptl_process_id_t initiator, - kptl_msg_t *msg) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - kptl_peer_t *peer; - kptl_peer_t *new_peer; - lnet_process_id_t lpid; - unsigned long flags; - kptl_tx_t *hello_tx; - int rc; - __u64 safe_matchbits; - __u64 last_matchbits_seen; - - lpid.nid = msg->ptlm_srcnid; - lpid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NET, "hello from %s(%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - - if (initiator.pid != kptllnd_data.kptl_portals_id.pid && - (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) { - /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be - * userspace. Refuse the connection if she hasn't set the - * correct flag in her PID... */ - CERROR("Userflag not set in hello from %s (%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - return NULL; - } - - /* kptlhm_matchbits are the highest matchbits my peer may have used to - * RDMA to me. I ensure I never register buffers for RDMA that could - * match any she used */ - safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1; - - if (safe_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Illegal matchbits "LPX64" in HELLO from %s\n", - safe_matchbits, libcfs_id2str(lpid)); - return NULL; - } - - if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) { - CERROR("%s: max message size %d < MIN %d", - libcfs_id2str(lpid), - msg->ptlm_u.hello.kptlhm_max_msg_size, - *kptllnd_tunables.kptl_max_msg_size); - return NULL; - } - - if (msg->ptlm_credits <= 1) { - CERROR("Need more than 1+%d credits from %s\n", - msg->ptlm_credits, libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* Completing HELLO handshake */ - LASSERT(peer->peer_incarnation == 0); - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp != peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring HELLO from %s: unexpected " - "dststamp "LPX64" ("LPX64" wanted)\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Concurrent initiation or response to my HELLO */ - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - return peer; - } - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp <= peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring stale HELLO from %s: " - "dststamp "LPX64" (current "LPX64")\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Brand new connection attempt: remove old incarnation */ - kptllnd_peer_close_locked(peer, 0); - } - - kptllnd_cull_peertable_locked(lpid); - - write_unlock_irqrestore(g_lock, flags); - - if (peer != NULL) { - CDEBUG(D_NET, "Peer %s (%s) reconnecting:" - " stamp "LPX64"("LPX64")\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator), - msg->ptlm_srcstamp, peer->peer_incarnation); - - kptllnd_peer_decref(peer); - } - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate HELLO message for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(lpid, initiator); - if (new_peer == NULL) { - kptllnd_tx_decref(hello_tx); - return NULL; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) { - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - - CERROR("Failed to reserve buffers for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - again: - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* An outgoing message instantiated 'peer' for me */ - LASSERT(peer->peer_incarnation == 0); - - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - - CWARN("Outgoing instantiated peer %s\n", - libcfs_id2str(lpid)); - } else { - LASSERT (peer->peer_state == PEER_STATE_ACTIVE); - - write_unlock_irqrestore(g_lock, flags); - - /* WOW! Somehow this peer completed the HELLO - * handshake while I slept. I guess I could have slept - * while it rebooted and sent a new HELLO, so I'll fail - * this one... */ - CWARN("Wow! peer %s\n", libcfs_id2str(lpid)); - kptllnd_peer_decref(peer); - peer = NULL; - } - - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return peer; - } - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection from ", lpid); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Refusing connection from %s\n", - libcfs_id2str(lpid)); - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(lpid); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_ACTIVE; - new_peer->peer_incarnation = msg->ptlm_srcstamp; - new_peer->peer_next_matchbits = safe_matchbits; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - new_peer->peer_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size; - - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post response hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - return new_peer; -} - -void -kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - kptllnd_post_tx(peer, tx, nfrag); - kptllnd_peer_check_sends(peer); -} - -int -kptllnd_find_target(kptl_peer_t **peerp, lnet_process_id_t target) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - ptl_process_id_t ptl_id; - kptl_peer_t *new_peer; - kptl_tx_t *hello_tx; - unsigned long flags; - int rc; - __u64 last_matchbits_seen; - - /* I expect to find the peer, so I only take a read lock... */ - read_lock_irqsave(g_lock, flags); - *peerp = kptllnd_id2peer_locked(target); - read_unlock_irqrestore(g_lock, flags); - - if (*peerp != NULL) - return 0; - - if ((target.pid & LNET_PID_USERFLAG) != 0) { - CWARN("Refusing to create a new connection to %s " - "(non-kernel peer)\n", libcfs_id2str(target)); - return -EHOSTUNREACH; - } - - /* The new peer is a kernel ptllnd, and kernel ptllnds all have - * the same portals PID */ - ptl_id.nid = kptllnd_lnet2ptlnid(target.nid); - ptl_id.pid = kptllnd_data.kptl_portals_id.pid; - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate connect message for %s\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(target, ptl_id); - if (new_peer == NULL) { - rc = -ENOMEM; - goto unwind_0; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) - goto unwind_1; - - write_lock_irqsave(g_lock, flags); - again: - *peerp = kptllnd_id2peer_locked(target); - if (*peerp != NULL) { - write_unlock_irqrestore(g_lock, flags); - goto unwind_2; - } - - kptllnd_cull_peertable_locked(target); - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection to ", target); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Can't create connection to %s\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto unwind_2; - } - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(target); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_WAITING_HELLO; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post initial hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - *peerp = new_peer; - return 0; - - unwind_2: - kptllnd_peer_unreserve_buffers(); - unwind_1: - kptllnd_peer_decref(new_peer); - unwind_0: - kptllnd_tx_decref(hello_tx); - - return rc; -} diff --git a/lnet/klnds/ptllnd/ptllnd_ptltrace.c b/lnet/klnds/ptllnd/ptllnd_ptltrace.c deleted file mode 100644 index 30064dcbb83997e9cafb8c1c98d2b58a4bc4fcd9..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_ptltrace.c +++ /dev/null @@ -1,172 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc. All rights reserved. - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -#ifdef CRAY_XT3 -static struct semaphore ptltrace_mutex; -static struct semaphore ptltrace_signal; - -void -kptllnd_ptltrace_to_file(char *filename) -{ - CFS_DECL_JOURNAL_DATA; - CFS_DECL_MMSPACE; - - cfs_file_t *filp; - char *start; - char *tmpbuf; - int len; - int rc; - loff_t offset = 0; - int eof = 0; - - CWARN("dumping ptltrace to %s\n", filename); - - LIBCFS_ALLOC(tmpbuf, PAGE_SIZE); - if (tmpbuf == NULL) { - CERROR("Can't allocate page buffer to dump %s\n", filename); - return; - } - - CFS_PUSH_JOURNAL; - - filp = cfs_filp_open(filename, - O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); - if (filp == NULL) { - CERROR("Error %d creating %s\n", rc, filename); - goto out; - } - - CFS_MMSPACE_OPEN; - - while (!eof) { - start = NULL; - len = ptl_proc_read(tmpbuf, &start, offset, - PAGE_SIZE, &eof, NULL); - - /* we don't allow ptl_proc_read to mimic case 0 or 1 behavior - * for a proc_read method, only #2: from proc_file_read - * - * 2) Set *start = an address within the buffer. - * Put the data of the requested offset at *start. - * Return the number of bytes of data placed there. - * If this number is greater than zero and you - * didn't signal eof and the reader is prepared to - * take more data you will be called again with the - * requested offset advanced by the number of bytes - * absorbed. - */ - - if (len == 0) /* end of file */ - break; - - if (len < 0) { - CERROR("ptl_proc_read: error %d\n", len); - break; - } - - LASSERT (start >= tmpbuf && start + len <= tmpbuf + PAGE_SIZE); - - rc = cfs_filp_write(filp, start, len, cfs_filp_poff(filp)); - if (rc != len) { - if (rc < 0) - CERROR("Error %d writing %s\n", rc, filename); - else - CERROR("Partial write %d(%d) to %s\n", - rc, len, filename); - break; - } - - offset += len; - } - - CFS_MMSPACE_CLOSE; - - rc = cfs_filp_fsync(filp); - if (rc != 0) - CERROR("Error %d syncing %s\n", rc, filename); - - cfs_filp_close(filp); -out: - CFS_POP_JOURNAL; - LIBCFS_FREE(tmpbuf, PAGE_SIZE); -} - -int -kptllnd_dump_ptltrace_thread(void *arg) -{ - static char fname[1024]; - - libcfs_daemonize("ptltracedump"); - - /* serialise with other instances of me */ - mutex_down(&ptltrace_mutex); - - snprintf(fname, sizeof(fname), "%s.%ld.%ld", - *kptllnd_tunables.kptl_ptltrace_basename, - cfs_time_current_sec(), (long)arg); - - kptllnd_ptltrace_to_file(fname); - - mutex_up(&ptltrace_mutex); - - /* unblock my creator */ - mutex_up(&ptltrace_signal); - - return 0; -} - -void -kptllnd_dump_ptltrace(void) -{ - int rc; - - if (!*kptllnd_tunables.kptl_ptltrace_on_timeout) - return; - - rc = cfs_kernel_thread(kptllnd_dump_ptltrace_thread, - (void *)(long)cfs_curproc_pid(), - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - CERROR("Error %d starting ptltrace dump thread\n", rc); - } else { - /* block until thread completes */ - mutex_down(&ptltrace_signal); - } -} - -void -kptllnd_init_ptltrace(void) -{ - init_mutex(&ptltrace_mutex); - init_mutex_locked(&ptltrace_signal); -} - -#else - -void -kptllnd_dump_ptltrace(void) -{ -} - -void -kptllnd_init_ptltrace(void) -{ -} - -#endif diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c deleted file mode 100644 index e897086a2691c85d7f694e68c1a59064e198e7ca..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ /dev/null @@ -1,722 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - #include "ptllnd.h" - -void -kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp) -{ - memset(rxbp, 0, sizeof(*rxbp)); - spin_lock_init(&rxbp->rxbp_lock); - INIT_LIST_HEAD(&rxbp->rxbp_list); -} - -void -kptllnd_rx_buffer_destroy(kptl_rx_buffer_t *rxb) -{ - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - - LASSERT(rxb->rxb_refcount == 0); - LASSERT(PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - LASSERT(!rxb->rxb_posted); - LASSERT(rxb->rxb_idle); - - list_del(&rxb->rxb_list); - rxbp->rxbp_count--; - - LIBCFS_FREE(rxb->rxb_buffer, kptllnd_rx_buffer_size()); - LIBCFS_FREE(rxb, sizeof(*rxb)); -} - -int -kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count) -{ - int bufsize; - int msgs_per_buffer; - int rc; - kptl_rx_buffer_t *rxb; - char *buffer; - unsigned long flags; - - bufsize = kptllnd_rx_buffer_size(); - msgs_per_buffer = bufsize / (*kptllnd_tunables.kptl_max_msg_size); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n", count); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - for (;;) { - if (rxbp->rxbp_shutdown) { - rc = -ESHUTDOWN; - break; - } - - if (rxbp->rxbp_reserved + count <= - rxbp->rxbp_count * msgs_per_buffer) { - rc = 0; - break; - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_ALLOC(rxb, sizeof(*rxb)); - LIBCFS_ALLOC(buffer, bufsize); - - if (rxb == NULL || buffer == NULL) { - CERROR("Failed to allocate rx buffer\n"); - - if (rxb != NULL) - LIBCFS_FREE(rxb, sizeof(*rxb)); - if (buffer != NULL) - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ENOMEM; - break; - } - - memset(rxb, 0, sizeof(*rxb)); - - rxb->rxb_eventarg.eva_type = PTLLND_EVENTARG_TYPE_BUF; - rxb->rxb_refcount = 0; - rxb->rxb_pool = rxbp; - rxb->rxb_idle = 0; - rxb->rxb_posted = 0; - rxb->rxb_buffer = buffer; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_FREE(rxb, sizeof(*rxb)); - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ESHUTDOWN; - break; - } - - list_add_tail(&rxb->rxb_list, &rxbp->rxbp_list); - rxbp->rxbp_count++; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - kptllnd_rx_buffer_post(rxb); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - if (rc == 0) - rxbp->rxbp_reserved += count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - return rc; -} - -void -kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, - int count) -{ - unsigned long flags; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n", count); - rxbp->rxbp_reserved -= count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp) -{ - kptl_rx_buffer_t *rxb; - int rc; - int i; - unsigned long flags; - struct list_head *tmp; - struct list_head *nxt; - ptl_handle_md_t mdh; - - /* CAVEAT EMPTOR: I'm racing with everything here!!! - * - * Buffers can still be posted after I set rxbp_shutdown because I - * can't hold rxbp_lock while I'm posting them. - * - * Calling PtlMDUnlink() here races with auto-unlinks; i.e. a buffer's - * MD handle could become invalid under me. I am vulnerable to portals - * re-using handles (i.e. make the same handle valid again, but for a - * different MD) from when the MD is actually unlinked, to when the - * event callback tells me it has been unlinked. */ - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxbp->rxbp_shutdown = 1; - - for (i = 9;; i++) { - list_for_each_safe(tmp, nxt, &rxbp->rxbp_list) { - rxb = list_entry (tmp, kptl_rx_buffer_t, rxb_list); - - if (rxb->rxb_idle) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, - flags); - kptllnd_rx_buffer_destroy(rxb); - spin_lock_irqsave(&rxbp->rxbp_lock, - flags); - continue; - } - - mdh = rxb->rxb_mdh; - if (PtlHandleIsEqual(mdh, PTL_INVALID_HANDLE)) - continue; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMDUnlink(mdh); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - /* callback clears rxb_mdh and drops net's ref - * (which causes repost, but since I set - * shutdown, it will just set the buffer - * idle) */ -#else - if (rc == PTL_OK) { - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - } -#endif - } - - if (list_empty(&rxbp->rxbp_list)) - break; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - /* Wait a bit for references to be dropped */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d Busy RX Buffers\n", - rxbp->rxbp_count); - - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb) -{ - int rc; - ptl_md_t md; - ptl_handle_me_t meh; - ptl_handle_md_t mdh; - ptl_process_id_t any; - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (rxb->rxb_refcount == 0); - LASSERT (!rxb->rxb_idle); - LASSERT (!rxb->rxb_posted); - LASSERT (PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - - any.nid = PTL_NID_ANY; - any.pid = PTL_PID_ANY; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - rxb->rxb_idle = 1; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - rxb->rxb_refcount = 1; /* net's ref */ - rxb->rxb_posted = 1; /* I'm posting */ - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - any, - LNET_MSG_MATCHBITS, - 0, /* all matchbits are valid - ignore none */ - PTL_UNLINK, - PTL_INS_AFTER, - &meh); - if (rc != PTL_OK) { - CERROR("PtlMeAttach rxb failed %d\n", rc); - goto failed; - } - - /* - * Setup MD - */ - md.start = rxb->rxb_buffer; - md.length = PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages; - md.threshold = PTL_MD_THRESH_INF; - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE | - PTL_MD_MAX_SIZE | - PTL_MD_LOCAL_ALIGN8; - md.user_ptr = &rxb->rxb_eventarg; - md.max_size = *kptllnd_tunables.kptl_max_msg_size; - md.eq_handle = kptllnd_data.kptl_eqh; - - rc = PtlMDAttach(meh, md, PTL_UNLINK, &mdh); - if (rc == PTL_OK) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - if (rxb->rxb_posted) /* Not auto-unlinked yet!!! */ - rxb->rxb_mdh = mdh; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - CERROR("PtlMDAttach rxb failed %d\n", rc); - rc = PtlMEUnlink(meh); - LASSERT(rc == PTL_OK); - - failed: - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rxb->rxb_posted = 0; - /* XXX this will just try again immediately */ - kptllnd_rx_buffer_decref_locked(rxb); - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -kptl_rx_t * -kptllnd_rx_alloc(void) -{ - kptl_rx_t* rx; - - if (IS_SIMULATION_ENABLED(FAIL_RX_ALLOC)) { - CERROR ("FAIL_RX_ALLOC SIMULATION triggered\n"); - return NULL; - } - - rx = cfs_mem_cache_alloc(kptllnd_data.kptl_rx_cache, CFS_ALLOC_ATOMIC); - if (rx == NULL) { - CERROR("Failed to allocate rx\n"); - return NULL; - } - - memset(rx, 0, sizeof(*rx)); - return rx; -} - -void -kptllnd_rx_done(kptl_rx_t *rx) -{ - kptl_rx_buffer_t *rxb = rx->rx_rxb; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - - CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer); - - if (rxb != NULL) - kptllnd_rx_buffer_decref(rxb); - - if (peer != NULL) { - /* Update credits (after I've decref-ed the buffer) */ - spin_lock_irqsave(&peer->peer_lock, flags); - - peer->peer_outstanding_credits++; - LASSERT (peer->peer_outstanding_credits + - peer->peer_sent_credits <= - *kptllnd_tunables.kptl_peercredits); - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - rx); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* I might have to send back credits */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); - } - - cfs_mem_cache_free(kptllnd_data.kptl_rx_cache, rx); -} - -void -kptllnd_rx_buffer_callback (ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - kptl_rx_buffer_t *rxb = kptllnd_eventarg2obj(eva); - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - kptl_rx_t *rx; - int unlinked; - unsigned long flags; - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = ev->type == PTL_EVENT_UNLINK; -#endif - - CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type, - unlinked); - - LASSERT (!rxb->rxb_idle); - LASSERT (ev->md.start == rxb->rxb_buffer); - LASSERT (ev->offset + ev->mlength <= - PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages); - LASSERT (ev->type == PTL_EVENT_PUT_END || - ev->type == PTL_EVENT_UNLINK); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->match_bits == LNET_MSG_MATCHBITS); - - if (ev->ni_fail_type != PTL_NI_OK) - CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - - if (ev->type == PTL_EVENT_PUT_END && - ev->ni_fail_type == PTL_NI_OK && - !rxbp->rxbp_shutdown) { - - /* rxbp_shutdown sampled without locking! I only treat it as a - * hint since shutdown can start while rx's are queued on - * kptl_sched_rxq. */ -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force message alignment - someone sending an - * odd-length message will misalign subsequent messages and - * force the fixup below... */ - if ((ev->mlength & 7) != 0) - CWARN("Message from %s has odd length %llu: " - "probable version incompatibility\n", - kptllnd_ptlid2str(ev->initiator), - ev->mlength); -#endif - rx = kptllnd_rx_alloc(); - if (rx == NULL) { - CERROR("Message from %s dropped: ENOMEM", - kptllnd_ptlid2str(ev->initiator)); - } else { - if ((ev->offset & 7) == 0) { - kptllnd_rx_buffer_addref(rxb); - rx->rx_rxb = rxb; - rx->rx_nob = ev->mlength; - rx->rx_msg = (kptl_msg_t *) - (rxb->rxb_buffer + ev->offset); - } else { -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force alignment - copy into - * rx_space (avoiding overflow) to fix */ - int maxlen = *kptllnd_tunables.kptl_max_msg_size; - - rx->rx_rxb = NULL; - rx->rx_nob = MIN(maxlen, ev->mlength); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - memcpy(rx->rx_msg, rxb->rxb_buffer + ev->offset, - rx->rx_nob); -#else - /* Portals should have forced the alignment */ - LBUG(); -#endif - } - - rx->rx_initiator = ev->initiator; - rx->rx_treceived = jiffies; -#ifdef CRAY_XT3 - rx->rx_uid = ev->uid; -#endif - /* Queue for attention */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - - list_add_tail(&rx->rx_list, - &kptllnd_data.kptl_sched_rxq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - } - } - - if (unlinked) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - } -} - -void -kptllnd_nak (kptl_rx_t *rx) -{ - /* Fire-and-forget a stub message that will let the peer know my - * protocol magic/version and make her drop/refresh any peer state she - * might have with me. */ - ptl_md_t md = { - .start = kptllnd_data.kptl_nak_msg, - .length = kptllnd_data.kptl_nak_msg->ptlm_nob, - .threshold = 1, - .options = 0, - .user_ptr = NULL, - .eq_handle = PTL_EQ_NONE}; - ptl_handle_md_t mdh; - int rc; - - rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh); - if (rc != PTL_OK) { - CWARN("Can't NAK %s: bind failed %d\n", - kptllnd_ptlid2str(rx->rx_initiator), rc); - return; - } - - rc = PtlPut(mdh, PTL_NOACK_REQ, rx->rx_initiator, - *kptllnd_tunables.kptl_portal, 0, - LNET_MSG_MATCHBITS, 0, 0); - - if (rc != PTL_OK) - CWARN("Can't NAK %s: put failed %d\n", - kptllnd_ptlid2str(rx->rx_initiator), rc); -} - -void -kptllnd_rx_parse(kptl_rx_t *rx) -{ - kptl_msg_t *msg = rx->rx_msg; - kptl_peer_t *peer; - int rc; - unsigned long flags; - lnet_process_id_t srcid; - - LASSERT (rx->rx_peer == NULL); - - if ((rx->rx_nob >= 4 && - (msg->ptlm_magic == LNET_PROTO_MAGIC || - msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) || - (rx->rx_nob >= 6 && - ((msg->ptlm_magic == PTLLND_MSG_MAGIC && - msg->ptlm_version != PTLLND_MSG_VERSION) || - (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC) && - msg->ptlm_version != __swab16(PTLLND_MSG_VERSION))))) { - /* NAK incompatible versions - * See other LNDs for how to handle this if/when ptllnd begins - * to allow different versions to co-exist */ - CERROR("Bad version: got %04x expected %04x from %s\n", - (__u32)(msg->ptlm_magic == PTLLND_MSG_MAGIC ? - msg->ptlm_version : __swab16(msg->ptlm_version)), - PTLLND_MSG_VERSION, kptllnd_ptlid2str(rx->rx_initiator)); - kptllnd_nak(rx); - goto rx_done; - } - - rc = kptllnd_msg_unpack(msg, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - srcid.nid = msg->ptlm_srcnid; - srcid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks\n", - libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, rx, rx->rx_rxb, jiffies - rx->rx_treceived); - - if (srcid.nid != kptllnd_ptl2lnetnid(rx->rx_initiator.nid)) { - CERROR("Bad source id %s from %s\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) - goto rx_done; - - CWARN("NAK from %s (%s)\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - - rc = -EPROTO; - goto failed; - } - - if (msg->ptlm_dstnid != kptllnd_data.kptl_ni->ni_nid || - msg->ptlm_dstpid != the_lnet.ln_pid) { - CERROR("Bad dstid %s (expected %s) from %s\n", - libcfs_id2str((lnet_process_id_t) { - .nid = msg->ptlm_dstnid, - .pid = msg->ptlm_dstpid}), - libcfs_id2str((lnet_process_id_t) { - .nid = kptllnd_data.kptl_ni->ni_nid, - .pid = the_lnet.ln_pid}), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - peer = kptllnd_peer_handle_hello(rx->rx_initiator, msg); - if (peer == NULL) - goto rx_done; - } else { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) { - CWARN("NAK %s: no connection; peer must reconnect\n", - libcfs_id2str(srcid)); - /* NAK to make the peer reconnect */ - kptllnd_nak(rx); - goto rx_done; - } - - /* Ignore anything apart from HELLO while I'm waiting for it and - * any messages for a previous incarnation of the connection */ - if (peer->peer_state == PEER_STATE_WAITING_HELLO || - msg->ptlm_dststamp < peer->peer_myincarnation) { - kptllnd_peer_decref(peer); - goto rx_done; - } - - if (msg->ptlm_srcstamp != peer->peer_incarnation) { - CERROR("%s: Unexpected srcstamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(peer->peer_id), - msg->ptlm_srcstamp, - peer->peer_incarnation); - rc = -EPROTO; - goto failed; - } - - if (msg->ptlm_dststamp != peer->peer_myincarnation) { - CERROR("%s: Unexpected dststamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(peer->peer_id), msg->ptlm_dststamp, - peer->peer_myincarnation); - rc = -EPROTO; - goto failed; - } - } - - LASSERT (msg->ptlm_srcnid == peer->peer_id.nid && - msg->ptlm_srcpid == peer->peer_id.pid); - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* Check peer only sends when I've sent her credits */ - if (peer->peer_sent_credits == 0) { - int c = peer->peer_credits; - int oc = peer->peer_outstanding_credits; - int sc = peer->peer_sent_credits; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CERROR("%s: buffer overrun [%d/%d+%d]\n", - libcfs_id2str(peer->peer_id), c, sc, oc); - goto failed; - } - peer->peer_sent_credits--; - - /* No check for credit overflow - the peer may post new - * buffers after the startup handshake. */ - peer->peer_credits += msg->ptlm_credits; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* See if something can go out now that credits have come in */ - if (msg->ptlm_credits != 0) - kptllnd_peer_check_sends(peer); - - /* ptllnd-level protocol correct - rx takes my ref on peer and increments - * peer_outstanding_credits when it completes */ - rx->rx_peer = peer; - kptllnd_peer_alive(peer); - - switch (msg->ptlm_type) { - default: - /* already checked by kptllnd_msg_unpack() */ - LBUG(); - - case PTLLND_MSG_TYPE_HELLO: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_NOOP: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n"); - rc = lnet_parse(kptllnd_data.kptl_ni, - &msg->ptlm_u.immediate.kptlim_hdr, - msg->ptlm_srcnid, - rx, 0); - if (rc >= 0) /* kptllnd_recv owns 'rx' now */ - return; - goto failed; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n", - msg->ptlm_type == PTLLND_MSG_TYPE_PUT ? - "PUT" : "GET"); - - /* checked in kptllnd_msg_unpack() */ - LASSERT (msg->ptlm_u.rdma.kptlrm_matchbits >= - PTL_RESERVED_MATCHBITS); - - /* Update last match bits seen */ - spin_lock_irqsave(&peer->peer_lock, flags); - - if (msg->ptlm_u.rdma.kptlrm_matchbits > - rx->rx_peer->peer_last_matchbits_seen) - rx->rx_peer->peer_last_matchbits_seen = - msg->ptlm_u.rdma.kptlrm_matchbits; - - spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags); - - rc = lnet_parse(kptllnd_data.kptl_ni, - &msg->ptlm_u.rdma.kptlrm_hdr, - msg->ptlm_srcnid, - rx, 1); - if (rc >= 0) /* kptllnd_recv owns 'rx' now */ - return; - goto failed; - } - - failed: - kptllnd_peer_close(peer, rc); - if (rx->rx_peer == NULL) /* drop ref on peer */ - kptllnd_peer_decref(peer); /* unless rx_done will */ - rx_done: - kptllnd_rx_done(rx); -} diff --git a/lnet/klnds/ptllnd/ptllnd_tx.c b/lnet/klnds/ptllnd/ptllnd_tx.c deleted file mode 100644 index 814a7d91341694a2e49997515564f2094b2ef044..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/ptllnd_tx.c +++ /dev/null @@ -1,507 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - #include "ptllnd.h" - -void -kptllnd_free_tx(kptl_tx_t *tx) -{ - if (tx->tx_msg != NULL) - LIBCFS_FREE(tx->tx_msg, sizeof(*tx->tx_msg)); - - if (tx->tx_frags != NULL) - LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags)); - - LIBCFS_FREE(tx, sizeof(*tx)); - - atomic_dec(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx); -} - -kptl_tx_t * -kptllnd_alloc_tx(void) -{ - kptl_tx_t *tx; - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Failed to allocate TX\n"); - return NULL; - } - - atomic_inc(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = atomic_read(&kptllnd_data.kptl_ntx); - - tx->tx_idle = 1; - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - tx->tx_rdma_eventarg.eva_type = PTLLND_EVENTARG_TYPE_RDMA; - tx->tx_msg_eventarg.eva_type = PTLLND_EVENTARG_TYPE_MSG; - tx->tx_msg = NULL; - tx->tx_frags = NULL; - - LIBCFS_ALLOC(tx->tx_msg, sizeof(*tx->tx_msg)); - if (tx->tx_msg == NULL) { - CERROR("Failed to allocate TX payload\n"); - goto failed; - } - - LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) { - CERROR("Failed to allocate TX frags\n"); - goto failed; - } - - return tx; - - failed: - kptllnd_free_tx(tx); - return NULL; -} - -int -kptllnd_setup_tx_descs() -{ - int n = *kptllnd_tunables.kptl_ntx; - int i; - - for (i = 0; i < n; i++) { - kptl_tx_t *tx = kptllnd_alloc_tx(); - - if (tx == NULL) - return -ENOMEM; - - spin_lock(&kptllnd_data.kptl_tx_lock); - - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - return 0; -} - -void -kptllnd_cleanup_tx_descs() -{ - kptl_tx_t *tx; - - /* No locking; single threaded now */ - LASSERT (kptllnd_data.kptl_shutdown == 2); - - while (!list_empty(&kptllnd_data.kptl_idle_txs)) { - tx = list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - - list_del(&tx->tx_list); - kptllnd_free_tx(tx); - } - - LASSERT (atomic_read(&kptllnd_data.kptl_ntx) == 0); -} - -kptl_tx_t * -kptllnd_get_idle_tx(enum kptl_tx_type type) -{ - kptl_tx_t *tx = NULL; - - if (IS_SIMULATION_ENABLED(FAIL_TX_PUT_ALLOC) && - type == TX_TYPE_PUT_REQUEST) { - CERROR("FAIL_TX_PUT_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX_GET_ALLOC) && - type == TX_TYPE_GET_REQUEST) { - CERROR ("FAIL_TX_GET_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX)) { - CERROR ("FAIL_TX SIMULATION triggered\n"); - return NULL; - } - - spin_lock(&kptllnd_data.kptl_tx_lock); - - if (list_empty (&kptllnd_data.kptl_idle_txs)) { - spin_unlock(&kptllnd_data.kptl_tx_lock); - - tx = kptllnd_alloc_tx(); - if (tx == NULL) - return NULL; - } else { - tx = list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - list_del(&tx->tx_list); - - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - LASSERT (atomic_read(&tx->tx_refcount)== 0); - LASSERT (tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (tx->tx_lnet_msg == NULL); - LASSERT (tx->tx_lnet_replymsg == NULL); - LASSERT (tx->tx_peer == NULL); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_type = type; - atomic_set(&tx->tx_refcount, 1); - tx->tx_status = 0; - tx->tx_idle = 0; - tx->tx_tposted = 0; - tx->tx_acked = *kptllnd_tunables.kptl_ack_puts; - - CDEBUG(D_NET, "tx=%p\n", tx); - return tx; -} - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - kptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_lnet_replymsg == NULL)); - - /* stash the tx on its peer until it completes */ - atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* These unlinks will ensure completion events (normal or unlink) will - * happen ASAP */ - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(msg_mdh); - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(rdma_mdh); - - return -EAGAIN; -} -#else -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - ptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - ptl_err_t prc; - - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_replymsg == NULL)); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(msg_mdh); - if (prc == PTL_OK) - msg_mdh = PTL_INVALID_HANDLE; - } - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(rdma_mdh); - if (prc == PTL_OK) - rdma_mdh = PTL_INVALID_HANDLE; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* update tx_???_mdh if callback hasn't fired */ - if (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)) - msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_msg_mdh = msg_mdh; - - if (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)) - rdma_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* stash the tx on its peer until it completes */ - atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_peer_addref(peer); /* extra ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* This will get the watchdog thread to try aborting all the peer's - * comms again. NB, this deems it fair that 1 failing tx which can't - * be aborted immediately (i.e. its MDs are still busy) is valid cause - * to nuke everything to the same peer! */ - kptllnd_peer_close(peer, tx->tx_status); - - kptllnd_peer_decref(peer); - - return -EAGAIN; -} -#endif - -void -kptllnd_tx_fini (kptl_tx_t *tx) -{ - lnet_msg_t *replymsg = tx->tx_lnet_replymsg; - lnet_msg_t *msg = tx->tx_lnet_msg; - kptl_peer_t *peer = tx->tx_peer; - int status = tx->tx_status; - int rc; - - LASSERT (!in_interrupt()); - LASSERT (atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - - /* TX has completed or failed */ - - if (peer != NULL) { - rc = kptllnd_tx_abort_netio(tx); - if (rc != 0) - return; - } - - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_lnet_msg = tx->tx_lnet_replymsg = NULL; - tx->tx_peer = NULL; - tx->tx_idle = 1; - - spin_lock(&kptllnd_data.kptl_tx_lock); - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - spin_unlock(&kptllnd_data.kptl_tx_lock); - - /* Must finalize AFTER freeing 'tx' */ - if (msg != NULL) - lnet_finalize(kptllnd_data.kptl_ni, msg, - (replymsg == NULL) ? status : 0); - - if (replymsg != NULL) - lnet_finalize(kptllnd_data.kptl_ni, replymsg, status); - - if (peer != NULL) - kptllnd_peer_decref(peer); -} - -const char * -kptllnd_tx_typestr(int type) -{ - switch (type) { - default: - return "<TYPE UNKNOWN>"; - - case TX_TYPE_SMALL_MESSAGE: - return "msg"; - - case TX_TYPE_PUT_REQUEST: - return "put_req"; - - case TX_TYPE_GET_REQUEST: - return "get_req"; - break; - - case TX_TYPE_PUT_RESPONSE: - return "put_rsp"; - break; - - case TX_TYPE_GET_RESPONSE: - return "get_rsp"; - } -} - -void -kptllnd_tx_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - int ismsg = (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG); - kptl_tx_t *tx = kptllnd_eventarg2obj(eva); - kptl_peer_t *peer = tx->tx_peer; - int ok = (ev->ni_fail_type == PTL_OK); - int unlinked; - unsigned long flags; - - LASSERT (peer != NULL); - LASSERT (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG || - eva->eva_type == PTLLND_EVENTARG_TYPE_RDMA); - LASSERT (!ismsg || !PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (ismsg || !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = (ev->type == PTL_EVENT_UNLINK); -#endif - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_SMALL_MESSAGE: - LASSERT (ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - - case TX_TYPE_PUT_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_GET_END)); - break; - - case TX_TYPE_GET_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_PUT_END)); - - if (!ismsg && ok && ev->type == PTL_EVENT_PUT_END) { - if (ev->hdr_data == PTLLND_RDMA_OK) { - lnet_set_reply_msg_len( - kptllnd_data.kptl_ni, - tx->tx_lnet_replymsg, - ev->mlength); - } else { - /* no match at peer */ - tx->tx_status = -EIO; - } - } - break; - - case TX_TYPE_PUT_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_REPLY_END); - break; - - case TX_TYPE_GET_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - } - - if (ok) { - kptllnd_peer_alive(peer); - } else { - CERROR("Portals error to %s: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - tx->tx_status = -EIO; - kptllnd_peer_close(peer, -EIO); - } - - if (!unlinked) - return; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (ismsg) - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - - if (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) || - !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) || - !tx->tx_active) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - } - - list_del(&tx->tx_list); - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* drop peer's ref, but if it was the last one... */ - if (atomic_dec_and_test(&tx->tx_refcount)) { - /* ...finalize it in thread context! */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - list_add_tail(&tx->tx_list, &kptllnd_data.kptl_sched_txq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - } -} diff --git a/lnet/klnds/ptllnd/wirecheck.c b/lnet/klnds/ptllnd/wirecheck.c deleted file mode 100644 index 8111cbb3f9b7e11f404eba6cc79263ac35d3ffd1..0000000000000000000000000000000000000000 --- a/lnet/klnds/ptllnd/wirecheck.c +++ /dev/null @@ -1,206 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ -#include <stdio.h> -#include <string.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include <config.h> - -#include <lnet/api-support.h> - -/* This ghastly hack to allows me to include lib-types.h It doesn't affect any - * assertions generated here (but fails-safe if it ever does) */ -typedef struct { - int counter; -} atomic_t; - -#include <lnet/lib-types.h> -#include <lnet/ptllnd_wire.h> - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#undef STRINGIFY -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[80]; - char gccinfo[80]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void kptllnd_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PTL_RESERVED_MATCHBITS); - CHECK_DEFINE (LNET_MSG_MATCHBITS); - - CHECK_DEFINE (PTLLND_MSG_MAGIC); - CHECK_DEFINE (PTLLND_MSG_VERSION); - - CHECK_DEFINE (PTLLND_RDMA_OK); - CHECK_DEFINE (PTLLND_RDMA_FAIL); - - CHECK_DEFINE (PTLLND_MSG_TYPE_INVALID); - CHECK_DEFINE (PTLLND_MSG_TYPE_PUT); - CHECK_DEFINE (PTLLND_MSG_TYPE_GET); - CHECK_DEFINE (PTLLND_MSG_TYPE_IMMEDIATE); - CHECK_DEFINE (PTLLND_MSG_TYPE_NOOP); - CHECK_DEFINE (PTLLND_MSG_TYPE_HELLO); - CHECK_DEFINE (PTLLND_MSG_TYPE_NAK); - - CHECK_STRUCT (kptl_msg_t); - CHECK_MEMBER (kptl_msg_t, ptlm_magic); - CHECK_MEMBER (kptl_msg_t, ptlm_version); - CHECK_MEMBER (kptl_msg_t, ptlm_type); - CHECK_MEMBER (kptl_msg_t, ptlm_credits); - CHECK_MEMBER (kptl_msg_t, ptlm_nob); - CHECK_MEMBER (kptl_msg_t, ptlm_cksum); - CHECK_MEMBER (kptl_msg_t, ptlm_srcnid); - CHECK_MEMBER (kptl_msg_t, ptlm_srcstamp); - CHECK_MEMBER (kptl_msg_t, ptlm_dstnid); - CHECK_MEMBER (kptl_msg_t, ptlm_dststamp); - CHECK_MEMBER (kptl_msg_t, ptlm_srcpid); - CHECK_MEMBER (kptl_msg_t, ptlm_dstpid); - CHECK_MEMBER (kptl_msg_t, ptlm_u.immediate); - CHECK_MEMBER (kptl_msg_t, ptlm_u.rdma); - CHECK_MEMBER (kptl_msg_t, ptlm_u.hello); - - CHECK_STRUCT (kptl_immediate_msg_t); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_hdr); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_payload[13]); - - CHECK_STRUCT (kptl_rdma_msg_t); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_hdr); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_matchbits); - - CHECK_STRUCT (kptl_hello_msg_t); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_matchbits); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_max_msg_size); - - printf ("}\n\n"); - - return (0); -} diff --git a/lnet/klnds/qswlnd/.cvsignore b/lnet/klnds/qswlnd/.cvsignore deleted file mode 100644 index 48b17e932f572d544111618c901df6eec08b9dcc..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.*.cmd -.tmp_versions -.depend diff --git a/lnet/klnds/qswlnd/Makefile.in b/lnet/klnds/qswlnd/Makefile.in deleted file mode 100644 index b623e029799bbe7d787c13c409952ca01b4a4086..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kqswlnd -kqswlnd-objs := qswlnd.o qswlnd_cb.o qswlnd_modparams.o - -EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include - -@INCLUDE_RULES@ diff --git a/lnet/klnds/qswlnd/autoMakefile.am b/lnet/klnds/qswlnd/autoMakefile.am deleted file mode 100644 index 721e86fc621e87e61f76e03a393f570fb3c2bd35..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_QSWLND -modulenet_DATA = kqswlnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kqswlnd-objs:%.o=%.c) qswlnd.h diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c deleted file mode 100644 index a8ecaca5178d2ea843ee7414e94de103259138e0..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd.c +++ /dev/null @@ -1,552 +0,0 @@ -/* - * Copyright (C) 2002-2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - - -lnd_t the_kqswlnd = -{ - .lnd_type = QSWLND, - .lnd_startup = kqswnal_startup, - .lnd_shutdown = kqswnal_shutdown, - .lnd_ctl = kqswnal_ctl, - .lnd_send = kqswnal_send, - .lnd_recv = kqswnal_recv, -}; - -kqswnal_data_t kqswnal_data; - -int -kqswnal_get_tx_desc (struct libcfs_ioctl_data *data) -{ - unsigned long flags; - struct list_head *tmp; - kqswnal_tx_t *ktx; - lnet_hdr_t *hdr; - int index = data->ioc_count; - int rc = -ENOENT; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_for_each (tmp, &kqswnal_data.kqn_activetxds) { - if (index-- != 0) - continue; - - ktx = list_entry (tmp, kqswnal_tx_t, ktx_list); - hdr = (lnet_hdr_t *)ktx->ktx_buffer; - - data->ioc_count = le32_to_cpu(hdr->payload_length); - data->ioc_nid = le64_to_cpu(hdr->dest_nid); - data->ioc_u64[0] = ktx->ktx_nid; - data->ioc_u32[0] = le32_to_cpu(hdr->type); - data->ioc_u32[1] = ktx->ktx_launcher; - data->ioc_flags = (list_empty (&ktx->ktx_schedlist) ? 0 : 1) | - (ktx->ktx_state << 2); - rc = 0; - break; - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - return (rc); -} - -int -kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - - LASSERT (ni == kqswnal_data.kqn_ni); - - switch (cmd) { - case IOC_LIBCFS_GET_TXDESC: - return (kqswnal_get_tx_desc (data)); - - case IOC_LIBCFS_REGISTER_MYNID: - if (data->ioc_nid == ni->ni_nid) - return 0; - - LASSERT (LNET_NIDNET(data->ioc_nid) == LNET_NIDNET(ni->ni_nid)); - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID for %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return 0; - - default: - return (-EINVAL); - } -} - -void -kqswnal_shutdown(lnet_ni_t *ni) -{ - unsigned long flags; - kqswnal_tx_t *ktx; - kqswnal_rx_t *krx; - - CDEBUG (D_NET, "shutdown\n"); - LASSERT (ni->ni_data == &kqswnal_data); - LASSERT (ni == kqswnal_data.kqn_ni); - - switch (kqswnal_data.kqn_init) - { - default: - LASSERT (0); - - case KQN_INIT_ALL: - case KQN_INIT_DATA: - break; - } - - /**********************************************************************/ - /* Signal the start of shutdown... */ - spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags); - kqswnal_data.kqn_shuttingdown = 1; - spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags); - - /**********************************************************************/ - /* wait for sends that have allocated a tx desc to launch or give up */ - while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) { - CDEBUG(D_NET, "waiting for %d pending sends\n", - atomic_read (&kqswnal_data.kqn_pending_txs)); - cfs_pause(cfs_time_seconds(1)); - } - - /**********************************************************************/ - /* close elan comms */ - /* Shut down receivers first; rx callbacks might try sending... */ - if (kqswnal_data.kqn_eprx_small != NULL) - ep_free_rcvr (kqswnal_data.kqn_eprx_small); - - if (kqswnal_data.kqn_eprx_large != NULL) - ep_free_rcvr (kqswnal_data.kqn_eprx_large); - - /* NB ep_free_rcvr() returns only after we've freed off all receive - * buffers (see shutdown handling in kqswnal_requeue_rx()). This - * means we must have completed any messages we passed to - * lnet_parse() */ - - if (kqswnal_data.kqn_eptx != NULL) - ep_free_xmtr (kqswnal_data.kqn_eptx); - - /* NB ep_free_xmtr() returns only after all outstanding transmits - * have called their callback... */ - LASSERT(list_empty(&kqswnal_data.kqn_activetxds)); - - /**********************************************************************/ - /* flag threads to terminate, wake them and wait for them to die */ - kqswnal_data.kqn_shuttingdown = 2; - wake_up_all (&kqswnal_data.kqn_sched_waitq); - - while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) { - CDEBUG(D_NET, "waiting for %d threads to terminate\n", - atomic_read (&kqswnal_data.kqn_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - /**********************************************************************/ - /* No more threads. No more portals, router or comms callbacks! - * I control the horizontals and the verticals... - */ - - LASSERT (list_empty (&kqswnal_data.kqn_readyrxds)); - LASSERT (list_empty (&kqswnal_data.kqn_donetxds)); - LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds)); - - /**********************************************************************/ - /* Unmap message buffers and free all descriptors and buffers - */ - - /* FTTB, we need to unmap any remaining mapped memory. When - * ep_dvma_release() get fixed (and releases any mappings in the - * region), we can delete all the code from here --------> */ - - for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) { - /* If ktx has a buffer, it got mapped; unmap now. NB only - * the pre-mapped stuff is still mapped since all tx descs - * must be idle */ - - if (ktx->ktx_buffer != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_ebuffer); - } - - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) { - /* If krx_kiov[0].kiov_page got allocated, it got mapped. - * NB subsequent pages get merged */ - - if (krx->krx_kiov[0].kiov_page != NULL) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_rx_nmh, - &krx->krx_elanbuffer); - } - /* <----------- to here */ - - if (kqswnal_data.kqn_ep_rx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh); - - if (kqswnal_data.kqn_ep_tx_nmh != NULL) - ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh); - - while (kqswnal_data.kqn_txds != NULL) { - ktx = kqswnal_data.kqn_txds; - - if (ktx->ktx_buffer != NULL) - LIBCFS_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - - kqswnal_data.kqn_txds = ktx->ktx_alloclist; - LIBCFS_FREE(ktx, sizeof(*ktx)); - } - - while (kqswnal_data.kqn_rxds != NULL) { - int i; - - krx = kqswnal_data.kqn_rxds; - for (i = 0; i < krx->krx_npages; i++) - if (krx->krx_kiov[i].kiov_page != NULL) - __free_page (krx->krx_kiov[i].kiov_page); - - kqswnal_data.kqn_rxds = krx->krx_alloclist; - LIBCFS_FREE(krx, sizeof (*krx)); - } - - /* resets flags, pointers to NULL etc */ - memset(&kqswnal_data, 0, sizeof (kqswnal_data)); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - -int -kqswnal_startup (lnet_ni_t *ni) -{ - EP_RAILMASK all_rails = EP_RAILMASK_ALL; - int rc; - int i; - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - int elan_page_idx; - - LASSERT (ni->ni_lnd == &the_kqswlnd); - -#if KQSW_CKSUM - if (the_lnet.ln_ptlcompat != 0) { - CERROR("Checksumming version not portals compatible\n"); - return -ENODEV; - } -#endif - /* Only 1 instance supported */ - if (kqswnal_data.kqn_init != KQN_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (ni->ni_interfaces[0] != NULL) { - CERROR("Explicit interface config not supported\n"); - return -EPERM; - } - - if (*kqswnal_tunables.kqn_credits >= - *kqswnal_tunables.kqn_ntxmsgs) { - LCONSOLE_ERROR("Configuration error: please set " - "ntxmsgs(%d) > credits(%d)\n", - *kqswnal_tunables.kqn_ntxmsgs, - *kqswnal_tunables.kqn_credits); - } - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&libcfs_kmemory)); - - /* ensure all pointers NULL etc */ - memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - - kqswnal_data.kqn_ni = ni; - ni->ni_data = &kqswnal_data; - ni->ni_peertxcredits = *kqswnal_tunables.kqn_peercredits; - ni->ni_maxtxcredits = *kqswnal_tunables.kqn_credits; - - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds); - spin_lock_init (&kqswnal_data.kqn_idletxd_lock); - - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_donetxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds); - - spin_lock_init (&kqswnal_data.kqn_sched_lock); - init_waitqueue_head (&kqswnal_data.kqn_sched_waitq); - - /* pointers/lists/locks initialised */ - kqswnal_data.kqn_init = KQN_INIT_DATA; - PORTAL_MODULE_USE; - - kqswnal_data.kqn_ep = ep_system(); - if (kqswnal_data.kqn_ep == NULL) { - CERROR("Can't initialise EKC\n"); - kqswnal_shutdown(ni); - return (-ENODEV); - } - - if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) { - CERROR("Can't get elan ID\n"); - kqswnal_shutdown(ni); - return (-ENODEV); - } - - kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_ep); - kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_ep); - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), kqswnal_data.kqn_elanid); - - /**********************************************************************/ - /* Get the transmitter */ - - kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep); - if (kqswnal_data.kqn_eptx == NULL) - { - CERROR ("Can't allocate transmitter\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Get the receivers */ - - kqswnal_data.kqn_eprx_small = - ep_alloc_rcvr (kqswnal_data.kqn_ep, - EP_MSG_SVC_PORTALS_SMALL, - *kqswnal_tunables.kqn_ep_envelopes_small); - if (kqswnal_data.kqn_eprx_small == NULL) - { - CERROR ("Can't install small msg receiver\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - kqswnal_data.kqn_eprx_large = - ep_alloc_rcvr (kqswnal_data.kqn_ep, - EP_MSG_SVC_PORTALS_LARGE, - *kqswnal_tunables.kqn_ep_envelopes_large); - if (kqswnal_data.kqn_eprx_large == NULL) - { - CERROR ("Can't install large msg receiver\n"); - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for transmit descriptors NB we may - * either send the contents of associated buffers immediately, or - * map them for the peer to suck/blow... */ - kqswnal_data.kqn_ep_tx_nmh = - ep_dvma_reserve(kqswnal_data.kqn_ep, - KQSW_NTXMSGPAGES*(*kqswnal_tunables.kqn_ntxmsgs), - EP_PERM_WRITE); - if (kqswnal_data.kqn_ep_tx_nmh == NULL) { - CERROR("Can't reserve tx dma space\n"); - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for receive buffers */ - kqswnal_data.kqn_ep_rx_nmh = - ep_dvma_reserve(kqswnal_data.kqn_ep, - KQSW_NRXMSGPAGES_SMALL * - (*kqswnal_tunables.kqn_nrxmsgs_small) + - KQSW_NRXMSGPAGES_LARGE * - (*kqswnal_tunables.kqn_nrxmsgs_large), - EP_PERM_WRITE); - if (kqswnal_data.kqn_ep_tx_nmh == NULL) { - CERROR("Can't reserve rx dma space\n"); - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Allocate/Initialise transmit descriptors */ - - kqswnal_data.kqn_txds = NULL; - for (i = 0; i < (*kqswnal_tunables.kqn_ntxmsgs); i++) - { - int premapped_pages; - int basepage = i * KQSW_NTXMSGPAGES; - - LIBCFS_ALLOC (ktx, sizeof(*ktx)); - if (ktx == NULL) { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - memset(ktx, 0, sizeof(*ktx)); /* NULL pointers; zero flags */ - ktx->ktx_alloclist = kqswnal_data.kqn_txds; - kqswnal_data.kqn_txds = ktx; - - LIBCFS_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - if (ktx->ktx_buffer == NULL) - { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - /* Map pre-allocated buffer NOW, to save latency on transmit */ - premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &all_rails, &ktx->ktx_ebuffer); - - ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ - ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - - INIT_LIST_HEAD (&ktx->ktx_schedlist); - - ktx->ktx_state = KTX_IDLE; - ktx->ktx_rail = -1; /* unset rail */ - - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); - } - - /**********************************************************************/ - /* Allocate/Initialise receive descriptors */ - kqswnal_data.kqn_rxds = NULL; - elan_page_idx = 0; - for (i = 0; i < *kqswnal_tunables.kqn_nrxmsgs_small + *kqswnal_tunables.kqn_nrxmsgs_large; i++) - { - EP_NMD elanbuffer; - int j; - - LIBCFS_ALLOC(krx, sizeof(*krx)); - if (krx == NULL) { - kqswnal_shutdown(ni); - return (-ENOMEM); - } - - memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */ - krx->krx_alloclist = kqswnal_data.kqn_rxds; - kqswnal_data.kqn_rxds = krx; - - if (i < *kqswnal_tunables.kqn_nrxmsgs_small) - { - krx->krx_npages = KQSW_NRXMSGPAGES_SMALL; - krx->krx_eprx = kqswnal_data.kqn_eprx_small; - } - else - { - krx->krx_npages = KQSW_NRXMSGPAGES_LARGE; - krx->krx_eprx = kqswnal_data.kqn_eprx_large; - } - - LASSERT (krx->krx_npages > 0); - for (j = 0; j < krx->krx_npages; j++) - { - struct page *page = alloc_page(GFP_KERNEL); - - if (page == NULL) { - kqswnal_shutdown (ni); - return (-ENOMEM); - } - - krx->krx_kiov[j] = (lnet_kiov_t) {.kiov_page = page, - .kiov_offset = 0, - .kiov_len = PAGE_SIZE}; - LASSERT(page_address(page) != NULL); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - page_address(page), - PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh, - elan_page_idx, &all_rails, &elanbuffer); - - if (j == 0) { - krx->krx_elanbuffer = elanbuffer; - } else { - rc = ep_nmd_merge(&krx->krx_elanbuffer, - &krx->krx_elanbuffer, - &elanbuffer); - /* NB contiguous mapping */ - LASSERT(rc); - } - elan_page_idx++; - - } - } - LASSERT (elan_page_idx == - (*kqswnal_tunables.kqn_nrxmsgs_small * KQSW_NRXMSGPAGES_SMALL) + - (*kqswnal_tunables.kqn_nrxmsgs_large * KQSW_NRXMSGPAGES_LARGE)); - - /**********************************************************************/ - /* Queue receives, now that it's OK to run their completion callbacks */ - - for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) { - /* NB this enqueue can allocate/sleep (attr == 0) */ - krx->krx_state = KRX_POSTED; - rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - if (rc != EP_SUCCESS) { - CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_shutdown (ni); - return (-EIO); - } - } - - /**********************************************************************/ - /* Spawn scheduling threads */ - for (i = 0; i < num_online_cpus(); i++) { - rc = kqswnal_thread_start (kqswnal_scheduler, NULL); - if (rc != 0) - { - CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_shutdown (ni); - return (-ESRCH); - } - } - - kqswnal_data.kqn_init = KQN_INIT_ALL; - return (0); -} - -void __exit -kqswnal_finalise (void) -{ - lnet_unregister_lnd(&the_kqswlnd); - kqswnal_tunables_fini(); -} - -static int __init -kqswnal_initialise (void) -{ - int rc = kqswnal_tunables_init(); - - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kqswlnd); - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Quadrics/Elan LND v1.01"); -MODULE_LICENSE("GPL"); - -module_init (kqswnal_initialise); -module_exit (kqswnal_finalise); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h deleted file mode 100644 index 0fe2a5eab76dc139a9402d2f5f2dcd9c660183ac..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd.h +++ /dev/null @@ -1,350 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _QSWNAL_H -#define _QSWNAL_H -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <qsnet/kernel.h> -#undef printf /* nasty QSW #define */ - -#include <linux/config.h> -#include <linux/module.h> - -#include <elan/epcomms.h> - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/locks.h> /* wait_on_buffer */ -#else -#include <linux/buffer_head.h> /* wait_on_buffer */ -#endif -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/sysctl.h> -#include <asm/segment.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -/* fixed constants */ -#define KQSW_SMALLMSG (4<<10) /* small/large ep receiver breakpoint */ -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ - -#define KQSW_CKSUM 0 /* enable checksumming (protocol incompatible) */ - -/* - * derived constants - */ - -#define KQSW_TX_BUFFER_SIZE (offsetof(kqswnal_msg_t, \ - kqm_u.immediate.kqim_payload[*kqswnal_tunables.kqn_tx_maxcontig])) -/* The pre-allocated tx buffer (hdr + small payload) */ - -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(LNET_MAX_PAYLOAD) + 1) -/* Reserve elan address space for pre-allocated and pre-mapped transmit - * buffer and a full payload too. Extra pages allow for page alignment */ - -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_SMALLMSG)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) - -#define KQSW_NRXMSGPAGES_LARGE (btopr(sizeof(lnet_msg_t) + LNET_MAX_PAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) -/* biggest complete packet we can receive (or transmit) */ - -/* Wire messages */ -/* Remote memory descriptor */ -typedef struct -{ - __u32 kqrmd_nfrag; /* # frags */ - EP_NMD kqrmd_frag[0]; /* actual frags */ -} kqswnal_remotemd_t; - -/* Immediate data */ -typedef struct -{ - lnet_hdr_t kqim_hdr; /* LNET header */ - char kqim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kqswnal_immediate_msg_t; - -/* RDMA request */ -typedef struct -{ - lnet_hdr_t kqrm_hdr; /* LNET header */ - kqswnal_remotemd_t kqrm_rmd; /* peer's buffer */ -} WIRE_ATTR kqswnal_rdma_msg_t; - -typedef struct -{ - __u32 kqm_magic; /* I'm a qswlnd message */ - __u16 kqm_version; /* this is my version number */ - __u16 kqm_type; /* msg type */ -#if KQSW_CKSUM - __u32 kqm_cksum; /* crc32 checksum */ - __u32 kqm_nob; /* original msg length */ -#endif - union { - kqswnal_immediate_msg_t immediate; - kqswnal_rdma_msg_t rdma; - } WIRE_ATTR kqm_u; -} WIRE_ATTR kqswnal_msg_t; - -#if KQSW_CKSUM /* enable checksums ? */ -# include <linux/crc32.h> -static inline __u32 kqswnal_csum(__u32 crc, unsigned char const *p, size_t len) -{ -#if 1 - return crc32_le(crc, p, len); -#else - while (len-- > 0) - crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ; - return crc; -#endif -} -# define QSWLND_PROTO_VERSION 0xbeef -#else -# define QSWLND_PROTO_VERSION 1 -#endif - -#define QSWLND_MSG_IMMEDIATE 0 -#define QSWLND_MSG_RDMA 1 - -typedef union { - EP_STATUSBLK ep_statusblk; - struct { - __u32 status; - __u32 magic; - __u32 version; - union { - struct { - __u32 len; - __u32 cksum; - } WIRE_ATTR get; - } WIRE_ATTR u; - } WIRE_ATTR msg; -} kqswnal_rpc_reply_t; - -typedef struct kqswnal_rx -{ - struct list_head krx_list; /* enqueue -> thread */ - struct kqswnal_rx *krx_alloclist; /* stack in kqn_rxds */ - EP_RCVR *krx_eprx; /* port to post receives to */ - EP_RXD *krx_rxd; /* receive descriptor (for repost) */ - EP_NMD krx_elanbuffer; /* contiguous Elan buffer */ - int krx_npages; /* # pages in receive buffer */ - int krx_nob; /* Number Of Bytes received into buffer */ - int krx_rpc_reply_needed:1; /* peer waiting for EKC RPC reply */ - int krx_raw_lnet_hdr:1; /* msg is a raw lnet hdr (portals compatible) */ - int krx_state; /* what this RX is doing */ - atomic_t krx_refcount; /* how to tell when rpc is done */ -#if KQSW_CKSUM - __u32 krx_cksum; /* checksum */ -#endif - kqswnal_rpc_reply_t krx_rpc_reply; /* rpc reply status block */ - lnet_kiov_t krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */ -} kqswnal_rx_t; - -#define KRX_POSTED 1 /* receiving */ -#define KRX_PARSE 2 /* ready to be parsed */ -#define KRX_COMPLETING 3 /* waiting to be completed */ - - -typedef struct kqswnal_tx -{ - struct list_head ktx_list; /* enqueue idle/active */ - struct list_head ktx_schedlist; /* enqueue on scheduler */ - struct kqswnal_tx *ktx_alloclist; /* stack in kqn_txds */ - unsigned int ktx_state:7; /* What I'm doing */ - unsigned int ktx_firsttmpfrag:1; /* ktx_frags[0] is in my ebuffer ? 0 : 1 */ - uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ - int ktx_npages; /* pages reserved for mapping messages */ - int ktx_nmappedpages; /* # pages mapped for current message */ - int ktx_port; /* destination ep port */ - lnet_nid_t ktx_nid; /* destination node */ - void *ktx_args[3]; /* completion passthru */ - char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ - unsigned long ktx_launchtime; /* when (in jiffies) the transmit was launched */ - int ktx_status; /* completion status */ -#if KQSW_CKSUM - __u32 ktx_cksum; /* optimized GET payload checksum */ -#endif - /* debug/info fields */ - pid_t ktx_launcher; /* pid of launching process */ - - int ktx_nfrag; /* # message frags */ - int ktx_rail; /* preferred rail */ - EP_NMD ktx_ebuffer; /* elan mapping of ktx_buffer */ - EP_NMD ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */ -} kqswnal_tx_t; - -#define KTX_IDLE 0 /* on kqn_idletxds */ -#define KTX_SENDING 1 /* normal send */ -#define KTX_GETTING 2 /* sending optimised get */ -#define KTX_PUTTING 3 /* sending optimised put */ -#define KTX_RDMA_FETCH 4 /* handling optimised put */ -#define KTX_RDMA_STORE 5 /* handling optimised get */ - -typedef struct -{ - int *kqn_tx_maxcontig; /* maximum payload to defrag */ - int *kqn_ntxmsgs; /* # normal tx msgs */ - int *kqn_credits; /* # concurrent sends */ - int *kqn_peercredits; /* # concurrent sends to 1 peer */ - int *kqn_nrxmsgs_large; /* # 'large' rx msgs */ - int *kqn_ep_envelopes_large; /* # 'large' rx ep envelopes */ - int *kqn_nrxmsgs_small; /* # 'small' rx msgs */ - int *kqn_ep_envelopes_small; /* # 'small' rx ep envelopes */ - int *kqn_optimized_puts; /* optimized PUTs? */ - int *kqn_optimized_gets; /* optimized GETs? */ -#if KQSW_CKSUM - int *kqn_inject_csum_error; /* # csum errors to inject */ -#endif - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif -} kqswnal_tunables_t; - -typedef struct -{ - char kqn_init; /* what's been initialised */ - char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads running */ - lnet_ni_t *kqn_ni; /* _the_ instance of me */ - - kqswnal_rx_t *kqn_rxds; /* stack of all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* stack of all the transmit descriptors */ - - struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_activetxds; /* transmit descriptors being used */ - spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ - atomic_t kqn_pending_txs; /* # transmits being prepped */ - - spinlock_t kqn_sched_lock; /* serialise packet schedulers */ - wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ - - struct list_head kqn_readyrxds; /* rxds full of data */ - struct list_head kqn_donetxds; /* completed transmits */ - struct list_head kqn_delayedtxds; /* delayed transmits */ - - EP_SYS *kqn_ep; /* elan system */ - EP_NMH *kqn_ep_tx_nmh; /* elan reserved tx vaddrs */ - EP_NMH *kqn_ep_rx_nmh; /* elan reserved rx vaddrs */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - - int kqn_nnodes; /* this cluster's size */ - int kqn_elanid; /* this nodes's elan ID */ - - EP_STATUSBLK kqn_rpc_success; /* preset RPC reply status blocks */ - EP_STATUSBLK kqn_rpc_failed; - EP_STATUSBLK kqn_rpc_version; /* reply to future version query */ - EP_STATUSBLK kqn_rpc_magic; /* reply to future version query */ -} kqswnal_data_t; - -/* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_ALL 2 - -extern kqswnal_tunables_t kqswnal_tunables; -extern kqswnal_data_t kqswnal_data; - -extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); -extern void kqswnal_rxhandler(EP_RXD *rxd); -extern int kqswnal_scheduler (void *); -extern void kqswnal_rx_done (kqswnal_rx_t *krx); - -static inline lnet_nid_t -kqswnal_elanid2nid (int elanid) -{ - return LNET_MKNID(LNET_NIDNET(kqswnal_data.kqn_ni->ni_nid), elanid); -} - -static inline int -kqswnal_nid2elanid (lnet_nid_t nid) -{ - __u32 elanid = LNET_NIDADDR(nid); - - /* not in this cluster? */ - return (elanid >= kqswnal_data.kqn_nnodes) ? -1 : elanid; -} - -static inline lnet_nid_t -kqswnal_rx_nid(kqswnal_rx_t *krx) -{ - return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd))); -} - -static inline int -kqswnal_pages_spanned (void *base, int nob) -{ - unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT; - unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT; - - LASSERT (last_page >= first_page); /* can't wrap address space */ - return (last_page - first_page + 1); -} - -static inline void kqswnal_rx_decref (kqswnal_rx_t *krx) -{ - LASSERT (atomic_read (&krx->krx_refcount) > 0); - if (atomic_dec_and_test (&krx->krx_refcount)) - kqswnal_rx_done(krx); -} - -int kqswnal_startup (lnet_ni_t *ni); -void kqswnal_shutdown (lnet_ni_t *ni); -int kqswnal_ctl (lnet_ni_t *ni, unsigned int cmd, void *arg); -int kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kqswnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); - -int kqswnal_tunables_init(void); -void kqswnal_tunables_fini(void); - -#endif /* _QSWNAL_H */ diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c deleted file mode 100644 index 86a1f8fb3e3a87a96b646b9ab6f9d1a9434e1797..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ /dev/null @@ -1,1832 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - -void -kqswnal_notify_peer_down(kqswnal_tx_t *ktx) -{ - struct timeval now; - time_t then; - - do_gettimeofday (&now); - then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ; - - lnet_notify(kqswnal_data.kqn_ni, ktx->ktx_nid, 0, then); -} - -void -kqswnal_unmap_tx (kqswnal_tx_t *ktx) -{ - int i; - - ktx->ktx_rail = -1; /* unset rail */ - - if (ktx->ktx_nmappedpages == 0) - return; - - CDEBUG(D_NET, "%p unloading %d frags starting at %d\n", - ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag); - - for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++) - ep_dvma_unload(kqswnal_data.kqn_ep, - kqswnal_data.kqn_ep_tx_nmh, - &ktx->ktx_frags[i]); - - ktx->ktx_nmappedpages = 0; -} - -int -kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, - unsigned int niov, lnet_kiov_t *kiov) -{ - int nfrags = ktx->ktx_nfrag; - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - char *ptr; - - EP_RAILMASK railmask; - int rail; - - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; - if (rail < 0) { - CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid)); - return (-ENETDOWN); - } - railmask = 1 << rail; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags >= ktx->ktx_firsttmpfrag); - LASSERT (nfrags <= EP_MAXFRAG); - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before 'offset' */ - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - kiov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = kiov->kiov_len - offset; - - /* each page frag is contained in one page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - if (fraglen > nob) - fraglen = nob; - - nmapped++; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == EP_MAXFRAG) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - EP_MAXFRAG); - return (-EMSGSIZE); - } - - /* XXX this is really crap, but we'll have to kmap until - * EKC has a page (rather than vaddr) mapping interface */ - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, page %d, %d total\n", - ktx, nfrags, ptr, fraglen, basepage, nmapped); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - ptr, fraglen, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &railmask, &ktx->ktx_frags[nfrags]); - - if (nfrags == ktx->ktx_firsttmpfrag || - !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags])) { - /* new frag if this is the first or can't merge */ - nfrags++; - } - - kunmap (kiov->kiov_page); - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - basepage++; - kiov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_nfrag = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); - - return (0); -} - -#if KQSW_CKSUM -__u32 -kqswnal_csum_kiov (__u32 csum, int offset, int nob, - unsigned int niov, lnet_kiov_t *kiov) -{ - char *ptr; - - if (nob == 0) - return csum; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before 'offset' */ - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - kiov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = kiov->kiov_len - offset; - - /* each page frag is contained in one page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - if (fraglen > nob) - fraglen = nob; - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; - - csum = kqswnal_csum(csum, ptr, fraglen); - - kunmap (kiov->kiov_page); - - kiov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - return csum; -} -#endif - -int -kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob, - unsigned int niov, struct iovec *iov) -{ - int nfrags = ktx->ktx_nfrag; - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - - EP_RAILMASK railmask; - int rail; - - if (ktx->ktx_rail < 0) - ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx, - EP_RAILMASK_ALL, - kqswnal_nid2elanid(ktx->ktx_nid)); - rail = ktx->ktx_rail; - if (rail < 0) { - CERROR("No rails available for %s\n", libcfs_nid2str(ktx->ktx_nid)); - return (-ENETDOWN); - } - railmask = 1 << rail; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags >= ktx->ktx_firsttmpfrag); - LASSERT (nfrags <= EP_MAXFRAG); - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before offset */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = iov->iov_len - offset; - long npages; - - if (fraglen > nob) - fraglen = nob; - npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - - nmapped += npages; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == EP_MAXFRAG) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - EP_MAXFRAG); - return (-EMSGSIZE); - } - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, pages %d for %ld, %d total\n", - ktx, nfrags, iov->iov_base + offset, fraglen, - basepage, npages, nmapped); - - ep_dvma_load(kqswnal_data.kqn_ep, NULL, - iov->iov_base + offset, fraglen, - kqswnal_data.kqn_ep_tx_nmh, basepage, - &railmask, &ktx->ktx_frags[nfrags]); - - if (nfrags == ktx->ktx_firsttmpfrag || - !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags - 1], - &ktx->ktx_frags[nfrags])) { - /* new frag if this is the first or can't merge */ - nfrags++; - } - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - basepage += npages; - iov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_nfrag = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages); - - return (0); -} - -#if KQSW_CKSUM -__u32 -kqswnal_csum_iov (__u32 csum, int offset, int nob, - unsigned int niov, struct iovec *iov) -{ - if (nob == 0) - return csum; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - /* skip complete frags before offset */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - int fraglen = iov->iov_len - offset; - - if (fraglen > nob) - fraglen = nob; - - csum = kqswnal_csum(csum, iov->iov_base + offset, fraglen); - - iov++; - niov--; - nob -= fraglen; - offset = 0; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - return csum; -} -#endif - -void -kqswnal_put_idle_tx (kqswnal_tx_t *ktx) -{ - unsigned long flags; - - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_del (&ktx->ktx_list); /* take off active list */ - list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds); - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); -} - -kqswnal_tx_t * -kqswnal_get_idle_tx (void) -{ - unsigned long flags; - kqswnal_tx_t *ktx; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - if (kqswnal_data.kqn_shuttingdown || - list_empty (&kqswnal_data.kqn_idletxds)) { - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - return NULL; - } - - ktx = list_entry (kqswnal_data.kqn_idletxds.next, kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - - list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds); - ktx->ktx_launcher = current->pid; - atomic_inc(&kqswnal_data.kqn_pending_txs); - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ - LASSERT (ktx->ktx_nmappedpages == 0); - return (ktx); -} - -void -kqswnal_tx_done_in_thread_context (kqswnal_tx_t *ktx) -{ - lnet_msg_t *lnetmsg0 = NULL; - lnet_msg_t *lnetmsg1 = NULL; - int status0 = 0; - int status1 = 0; - kqswnal_rx_t *krx; - - LASSERT (!in_interrupt()); - - if (ktx->ktx_status == -EHOSTDOWN) - kqswnal_notify_peer_down(ktx); - - switch (ktx->ktx_state) { - case KTX_RDMA_FETCH: /* optimized PUT/REPLY handled */ - krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = ktx->ktx_status; -#if KQSW_CKSUM - if (status0 == 0) { /* RDMA succeeded */ - kqswnal_msg_t *msg; - __u32 csum; - - msg = (kqswnal_msg_t *) - page_address(krx->krx_kiov[0].kiov_page); - - csum = (lnetmsg0->msg_kiov != NULL) ? - kqswnal_csum_kiov(krx->krx_cksum, - lnetmsg0->msg_offset, - lnetmsg0->msg_wanted, - lnetmsg0->msg_niov, - lnetmsg0->msg_kiov) : - kqswnal_csum_iov(krx->krx_cksum, - lnetmsg0->msg_offset, - lnetmsg0->msg_wanted, - lnetmsg0->msg_niov, - lnetmsg0->msg_iov); - - /* Can only check csum if I got it all */ - if (lnetmsg0->msg_wanted == lnetmsg0->msg_len && - csum != msg->kqm_cksum) { - ktx->ktx_status = -EIO; - krx->krx_rpc_reply.msg.status = -EIO; - CERROR("RDMA checksum failed %u(%u) from %s\n", - csum, msg->kqm_cksum, - libcfs_nid2str(kqswnal_rx_nid(krx))); - } - } -#endif - LASSERT (krx->krx_state == KRX_COMPLETING); - kqswnal_rx_decref (krx); - break; - - case KTX_RDMA_STORE: /* optimized GET handled */ - case KTX_PUTTING: /* optimized PUT sent */ - case KTX_SENDING: /* normal send */ - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = ktx->ktx_status; - break; - - case KTX_GETTING: /* optimized GET sent & payload received */ - /* Complete the GET with success since we can't avoid - * delivering a REPLY event; we committed to it when we - * launched the GET */ - lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - status0 = 0; - lnetmsg1 = (lnet_msg_t *)ktx->ktx_args[2]; - status1 = ktx->ktx_status; -#if KQSW_CKSUM - if (status1 == 0) { /* RDMA succeeded */ - lnet_msg_t *lnetmsg0 = (lnet_msg_t *)ktx->ktx_args[1]; - lnet_libmd_t *md = lnetmsg0->msg_md; - __u32 csum; - - csum = ((md->md_options & LNET_MD_KIOV) != 0) ? - kqswnal_csum_kiov(~0, 0, - md->md_length, - md->md_niov, - md->md_iov.kiov) : - kqswnal_csum_iov(~0, 0, - md->md_length, - md->md_niov, - md->md_iov.iov); - - if (csum != ktx->ktx_cksum) { - CERROR("RDMA checksum failed %u(%u) from %s\n", - csum, ktx->ktx_cksum, - libcfs_nid2str(ktx->ktx_nid)); - status1 = -EIO; - } - } -#endif - break; - - default: - LASSERT (0); - } - - kqswnal_put_idle_tx (ktx); - - lnet_finalize (kqswnal_data.kqn_ni, lnetmsg0, status0); - if (lnetmsg1 != NULL) - lnet_finalize (kqswnal_data.kqn_ni, lnetmsg1, status1); -} - -void -kqswnal_tx_done (kqswnal_tx_t *ktx, int status) -{ - unsigned long flags; - - ktx->ktx_status = status; - - if (!in_interrupt()) { - kqswnal_tx_done_in_thread_context(ktx); - return; - } - - /* Complete the send in thread context */ - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail(&ktx->ktx_schedlist, - &kqswnal_data.kqn_donetxds); - wake_up(&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, flags); -} - -static void -kqswnal_txhandler(EP_TXD *txd, void *arg, int status) -{ - kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg; - kqswnal_rpc_reply_t *reply; - - LASSERT (txd != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status); - - if (status != EP_SUCCESS) { - - CDEBUG (D_NETERROR, "Tx completion to %s failed: %d\n", - libcfs_nid2str(ktx->ktx_nid), status); - - status = -EHOSTDOWN; - - } else switch (ktx->ktx_state) { - - case KTX_GETTING: - case KTX_PUTTING: - /* RPC complete! */ - reply = (kqswnal_rpc_reply_t *)ep_txd_statusblk(txd); - if (reply->msg.magic == 0) { /* "old" peer */ - status = reply->msg.status; - break; - } - - if (reply->msg.magic != LNET_PROTO_QSW_MAGIC) { - if (reply->msg.magic != swab32(LNET_PROTO_QSW_MAGIC)) { - CERROR("%s unexpected rpc reply magic %08x\n", - libcfs_nid2str(ktx->ktx_nid), - reply->msg.magic); - status = -EPROTO; - break; - } - - __swab32s(&reply->msg.status); - __swab32s(&reply->msg.version); - - if (ktx->ktx_state == KTX_GETTING) { - __swab32s(&reply->msg.u.get.len); - __swab32s(&reply->msg.u.get.cksum); - } - } - - status = reply->msg.status; - if (status != 0) { - CERROR("%s RPC status %08x\n", - libcfs_nid2str(ktx->ktx_nid), status); - break; - } - - if (ktx->ktx_state == KTX_GETTING) { - lnet_set_reply_msg_len(kqswnal_data.kqn_ni, - (lnet_msg_t *)ktx->ktx_args[2], - reply->msg.u.get.len); -#if KQSW_CKSUM - ktx->ktx_cksum = reply->msg.u.get.cksum; -#endif - } - break; - - case KTX_SENDING: - status = 0; - break; - - default: - LBUG(); - break; - } - - kqswnal_tx_done(ktx, status); -} - -int -kqswnal_launch (kqswnal_tx_t *ktx) -{ - /* Don't block for transmit descriptor if we're in interrupt context */ - int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int dest = kqswnal_nid2elanid (ktx->ktx_nid); - unsigned long flags; - int rc; - - ktx->ktx_launchtime = jiffies; - - if (kqswnal_data.kqn_shuttingdown) - return (-ESHUTDOWN); - - LASSERT (dest >= 0); /* must be a peer */ - - if (ktx->ktx_nmappedpages != 0) - attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail); - - switch (ktx->ktx_state) { - case KTX_GETTING: - case KTX_PUTTING: - if (the_lnet.ln_testprotocompat != 0 && - the_lnet.ln_ptlcompat == 0) { - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* single-shot proto test: - * Future version queries will use an RPC, so I'll - * co-opt one of the existing ones */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - msg->kqm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - msg->kqm_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t. - * The other frags are the payload, awaiting RDMA */ - rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, - kqswnal_txhandler, ktx, - NULL, ktx->ktx_frags, 1); - break; - - case KTX_SENDING: - rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, - kqswnal_txhandler, ktx, - NULL, ktx->ktx_frags, ktx->ktx_nfrag); - break; - - default: - LBUG(); - rc = -EINVAL; /* no compiler warning please */ - break; - } - - switch (rc) { - case EP_SUCCESS: /* success */ - return (0); - - case EP_ENOMEM: /* can't allocate ep txd => queue for later */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&ktx->ktx_schedlist, &kqswnal_data.kqn_delayedtxds); - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - return (0); - - default: /* fatal error */ - CDEBUG (D_NETERROR, "Tx to %s failed: %d\n", libcfs_nid2str(ktx->ktx_nid), rc); - kqswnal_notify_peer_down(ktx); - return (-EHOSTUNREACH); - } -} - -#if 0 -static char * -hdr_type_string (lnet_hdr_t *hdr) -{ - switch (hdr->type) { - case LNET_MSG_ACK: - return ("ACK"); - case LNET_MSG_PUT: - return ("PUT"); - case LNET_MSG_GET: - return ("GET"); - case LNET_MSG_REPLY: - return ("REPLY"); - default: - return ("<UNKNOWN>"); - } -} - -static void -kqswnal_cerror_hdr(lnet_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str, - le32_to_cpu(hdr->payload_length)); - CERROR(" From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid), - le32_to_cpu(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid), - le32_to_cpu(hdr->dest_pid)); - - switch (le32_to_cpu(hdr->type)) { - case LNET_MSG_PUT: - CERROR(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - le32_to_cpu(hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - le64_to_cpu(hdr->msg.put.match_bits)); - CERROR(" offset %d, hdr data "LPX64"\n", - le32_to_cpu(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CERROR(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", - le32_to_cpu(hdr->msg.get.ptl_index), - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CERROR(" Length %d, src offset %d\n", - le32_to_cpu(hdr->msg.get.sink_length), - le32_to_cpu(hdr->msg.get.src_offset)); - break; - - case LNET_MSG_ACK: - CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - le32_to_cpu(hdr->msg.ack.mlength)); - break; - - case LNET_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64"\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - } - -} /* end of print_hdr() */ -#endif - -int -kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag, - int nrfrag, EP_NMD *rfrag) -{ - int i; - - if (nlfrag != nrfrag) { - CERROR("Can't cope with unequal # frags: %d local %d remote\n", - nlfrag, nrfrag); - return (-EINVAL); - } - - for (i = 0; i < nlfrag; i++) - if (lfrag[i].nmd_len != rfrag[i].nmd_len) { - CERROR("Can't cope with unequal frags %d(%d):" - " %d local %d remote\n", - i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len); - return (-EINVAL); - } - - return (0); -} - -kqswnal_remotemd_t * -kqswnal_get_portalscompat_rmd (kqswnal_rx_t *krx) -{ - /* Check that the RMD sent after the "raw" LNET header in a - * portals-compatible QSWLND message is OK */ - char *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page); - kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + sizeof(lnet_hdr_t)); - - /* Note RDMA addresses are sent in native endian-ness in the "old" - * portals protocol so no swabbing... */ - - if (buffer + krx->krx_nob < (char *)(rmd + 1)) { - /* msg too small to discover rmd size */ - CERROR ("Incoming message [%d] too small for RMD (%d needed)\n", - krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer)); - return (NULL); - } - - if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) { - /* rmd doesn't fit in the incoming message */ - CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n", - krx->krx_nob, rmd->kqrmd_nfrag, - (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer)); - return (NULL); - } - - return (rmd); -} - -void -kqswnal_rdma_store_complete (EP_RXD *rxd) -{ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMA_STORE); - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - krx->krx_rpc_reply_needed = 0; - kqswnal_rx_decref (krx); - - /* free ktx & finalize() its lnet_msg_t */ - kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED); -} - -void -kqswnal_rdma_fetch_complete (EP_RXD *rxd) -{ - /* Completed fetching the PUT/REPLY data */ - int status = ep_rxd_status(rxd); - kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0]; - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, ktx %p, status %d\n", rxd, ktx, status); - - LASSERT (ktx->ktx_state == KTX_RDMA_FETCH); - LASSERT (krx->krx_rxd == rxd); - /* RPC completes with failure by default */ - LASSERT (krx->krx_rpc_reply_needed); - LASSERT (krx->krx_rpc_reply.msg.status != 0); - - if (status == EP_SUCCESS) { - krx->krx_rpc_reply.msg.status = 0; - status = 0; - } else { - /* Abandon RPC since get failed */ - krx->krx_rpc_reply_needed = 0; - status = -ECONNABORTED; - } - - /* krx gets decref'd in kqswnal_tx_done_in_thread_context() */ - LASSERT (krx->krx_state == KRX_PARSE); - krx->krx_state = KRX_COMPLETING; - - /* free ktx & finalize() its lnet_msg_t */ - kqswnal_tx_done(ktx, status); -} - -int -kqswnal_rdma (kqswnal_rx_t *krx, lnet_msg_t *lntmsg, - int type, kqswnal_remotemd_t *rmd, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int len) -{ - kqswnal_tx_t *ktx; - int eprc; - int rc; - - /* Not both mapped and paged payload */ - LASSERT (iov == NULL || kiov == NULL); - /* RPC completes with failure by default */ - LASSERT (krx->krx_rpc_reply_needed); - LASSERT (krx->krx_rpc_reply.msg.status != 0); - - if (len == 0) { - /* data got truncated to nothing. */ - lnet_finalize(kqswnal_data.kqn_ni, lntmsg, 0); - /* Let kqswnal_rx_done() complete the RPC with success */ - krx->krx_rpc_reply.msg.status = 0; - return (0); - } - - /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not - actually sending a portals message with it */ - ktx = kqswnal_get_idle_tx(); - if (ktx == NULL) { - CERROR ("Can't get txd for RDMA with %s\n", - libcfs_nid2str(kqswnal_rx_nid(krx))); - return (-ENOMEM); - } - - ktx->ktx_state = type; - ktx->ktx_nid = kqswnal_rx_nid(krx); - ktx->ktx_args[0] = krx; - ktx->ktx_args[1] = lntmsg; - - LASSERT (atomic_read(&krx->krx_refcount) > 0); - /* Take an extra ref for the completion callback */ - atomic_inc(&krx->krx_refcount); - - /* Map on the rail the RPC prefers */ - ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx, - ep_rxd_railmask(krx->krx_rxd)); - - /* Start mapping at offset 0 (we're not mapping any headers) */ - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0; - - if (kiov != NULL) - rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov); - else - rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov); - - if (rc != 0) { - CERROR ("Can't map local RDMA data: %d\n", rc); - goto out; - } - - rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags, - rmd->kqrmd_nfrag, rmd->kqrmd_frag); - if (rc != 0) { - CERROR ("Incompatible RDMA descriptors\n"); - goto out; - } - - switch (type) { - default: - LBUG(); - - case KTX_RDMA_STORE: - krx->krx_rpc_reply.msg.status = 0; - krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; - krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; - krx->krx_rpc_reply.msg.u.get.len = len; -#if KQSW_CKSUM - krx->krx_rpc_reply.msg.u.get.cksum = (kiov != NULL) ? - kqswnal_csum_kiov(~0, offset, len, niov, kiov) : - kqswnal_csum_iov(~0, offset, len, niov, iov); - if (*kqswnal_tunables.kqn_inject_csum_error == 4) { - krx->krx_rpc_reply.msg.u.get.cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - eprc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rdma_store_complete, ktx, - &krx->krx_rpc_reply.ep_statusblk, - ktx->ktx_frags, rmd->kqrmd_frag, - rmd->kqrmd_nfrag); - if (eprc != EP_SUCCESS) { - CERROR("can't complete RPC: %d\n", eprc); - /* don't re-attempt RPC completion */ - krx->krx_rpc_reply_needed = 0; - rc = -ECONNABORTED; - } - break; - - case KTX_RDMA_FETCH: - eprc = ep_rpc_get (krx->krx_rxd, - kqswnal_rdma_fetch_complete, ktx, - rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag); - if (eprc != EP_SUCCESS) { - CERROR("ep_rpc_get failed: %d\n", eprc); - /* Don't attempt RPC completion: - * EKC nuked it when the get failed */ - krx->krx_rpc_reply_needed = 0; - rc = -ECONNABORTED; - } - break; - } - - out: - if (rc != 0) { - kqswnal_rx_decref(krx); /* drop callback's ref */ - kqswnal_put_idle_tx (ktx); - } - - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc); -} - -int -kqswnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - int nob; - kqswnal_tx_t *ktx; - int rc; - - /* NB 1. hdr is in network byte order */ - /* 2. 'private' depends on the message type */ - - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* It must be OK to kmap() if required */ - LASSERT (payload_kiov == NULL || !in_interrupt ()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - if (kqswnal_nid2elanid (target.nid) < 0) { - CERROR("%s not in my cluster\n", libcfs_nid2str(target.nid)); - return -EIO; - } - - /* I may not block for a transmit descriptor if I might block the - * router, receiver, or an interrupt handler. */ - ktx = kqswnal_get_idle_tx(); - if (ktx == NULL) { - CERROR ("Can't get txd for msg type %d for %s\n", - type, libcfs_nid2str(target.nid)); - return (-ENOMEM); - } - - ktx->ktx_state = KTX_SENDING; - ktx->ktx_nid = target.nid; - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = lntmsg; - ktx->ktx_args[2] = NULL; /* set when a GET commits to REPLY */ - - /* The first frag will be the pre-mapped buffer. */ - ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1; - - if ((!target_is_router && /* target.nid is final dest */ - !routing && /* I'm the source */ - type == LNET_MSG_GET && /* optimize GET? */ - *kqswnal_tunables.kqn_optimized_gets != 0 && - lntmsg->msg_md->md_length >= - *kqswnal_tunables.kqn_optimized_gets) || - ((type == LNET_MSG_PUT || /* optimize PUT? */ - type == LNET_MSG_REPLY) && /* optimize REPLY? */ - *kqswnal_tunables.kqn_optimized_puts != 0 && - payload_nob >= *kqswnal_tunables.kqn_optimized_puts)) { - lnet_libmd_t *md = lntmsg->msg_md; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - lnet_hdr_t *mhdr; - kqswnal_remotemd_t *rmd; - - /* Optimised path: I send over the Elan vaddrs of the local - * buffers, and my peer DMAs directly to/from them. - * - * First I set up ktx as if it was going to send this - * payload, (it needs to map it anyway). This fills - * ktx_frags[1] and onward with the network addresses - * of the buffer frags. */ - - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET - * header + rdma descriptor */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - rmd = (kqswnal_remotemd_t *)(mhdr + 1); - } else { - /* Send an RDMA message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_RDMA; - - mhdr = &msg->kqm_u.rdma.kqrm_hdr; - rmd = &msg->kqm_u.rdma.kqrm_rmd; - } - - *mhdr = *hdr; - nob = (((char *)rmd) - ktx->ktx_buffer); - - if (type == LNET_MSG_GET) { - if ((md->md_options & LNET_MD_KIOV) != 0) - rc = kqswnal_map_tx_kiov (ktx, 0, md->md_length, - md->md_niov, md->md_iov.kiov); - else - rc = kqswnal_map_tx_iov (ktx, 0, md->md_length, - md->md_niov, md->md_iov.iov); - ktx->ktx_state = KTX_GETTING; - } else { - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov(ktx, 0, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov(ktx, 0, payload_nob, - payload_niov, payload_iov); - ktx->ktx_state = KTX_PUTTING; - } - - if (rc != 0) - goto out; - - rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1; - nob += offsetof(kqswnal_remotemd_t, - kqrmd_frag[rmd->kqrmd_nfrag]); - LASSERT (nob <= KQSW_TX_BUFFER_SIZE); - - memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1], - rmd->kqrmd_nfrag * sizeof(EP_NMD)); - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); -#if KQSW_CKSUM - LASSERT (the_lnet.ln_ptlcompat != 2); - msg->kqm_nob = nob + payload_nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); -#endif - if (type == LNET_MSG_GET) { - /* Allocate reply message now while I'm in thread context */ - ktx->ktx_args[2] = lnet_create_reply_msg ( - kqswnal_data.kqn_ni, lntmsg); - if (ktx->ktx_args[2] == NULL) - goto out; - - /* NB finalizing the REPLY message is my - * responsibility now, whatever happens. */ -#if KQSW_CKSUM - if (*kqswnal_tunables.kqn_inject_csum_error == 3) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } - - } else if (payload_kiov != NULL) { - /* must checksum payload after header so receiver can - * compute partial header cksum before swab. Sadly - * this causes 2 rounds of kmap */ - msg->kqm_cksum = - kqswnal_csum_kiov(msg->kqm_cksum, 0, payload_nob, - payload_niov, payload_kiov); - if (*kqswnal_tunables.kqn_inject_csum_error == 2) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } - } else { - msg->kqm_cksum = - kqswnal_csum_iov(msg->kqm_cksum, 0, payload_nob, - payload_niov, payload_iov); - if (*kqswnal_tunables.kqn_inject_csum_error == 2) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - } - - } else if (payload_nob <= *kqswnal_tunables.kqn_tx_maxcontig) { - lnet_hdr_t *mhdr; - char *payload; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* small message: single frag copied into the pre-mapped buffer */ - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET header - * + payload */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - payload = (char *)(mhdr + 1); - } else { - /* Send an IMMEDIATE message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_IMMEDIATE; - - mhdr = &msg->kqm_u.immediate.kqim_hdr; - payload = msg->kqm_u.immediate.kqim_payload; - } - - *mhdr = *hdr; - nob = (payload - ktx->ktx_buffer) + payload_nob; - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(KQSW_TX_BUFFER_SIZE, payload, 0, - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(KQSW_TX_BUFFER_SIZE, payload, 0, - payload_niov, payload_iov, - payload_offset, payload_nob); -#if KQSW_CKSUM - LASSERT (the_lnet.ln_ptlcompat != 2); - msg->kqm_nob = nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); - if (*kqswnal_tunables.kqn_inject_csum_error == 1) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - } else { - lnet_hdr_t *mhdr; - kqswnal_msg_t *msg = (kqswnal_msg_t *)ktx->ktx_buffer; - - /* large message: multiple frags: first is hdr in pre-mapped buffer */ - if (the_lnet.ln_ptlcompat == 2) { - /* Strong portals compatibility: send "raw" LNET header - * + payload */ - mhdr = (lnet_hdr_t *)ktx->ktx_buffer; - nob = sizeof(lnet_hdr_t); - } else { - /* Send an IMMEDIATE message */ - msg->kqm_magic = LNET_PROTO_QSW_MAGIC; - msg->kqm_version = QSWLND_PROTO_VERSION; - msg->kqm_type = QSWLND_MSG_IMMEDIATE; - - mhdr = &msg->kqm_u.immediate.kqim_hdr; - nob = offsetof(kqswnal_msg_t, - kqm_u.immediate.kqim_payload); - } - - *mhdr = *hdr; - - ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer, 0, nob); - - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob, - payload_niov, payload_iov); - if (rc != 0) - goto out; - -#if KQSW_CKSUM - msg->kqm_nob = nob + payload_nob; - msg->kqm_cksum = 0; - msg->kqm_cksum = kqswnal_csum(~0, (char *)msg, nob); - - msg->kqm_cksum = (payload_kiov != NULL) ? - kqswnal_csum_kiov(msg->kqm_cksum, - payload_offset, payload_nob, - payload_niov, payload_kiov) : - kqswnal_csum_iov(msg->kqm_cksum, - payload_offset, payload_nob, - payload_niov, payload_iov); - - if (*kqswnal_tunables.kqn_inject_csum_error == 1) { - msg->kqm_cksum++; - *kqswnal_tunables.kqn_inject_csum_error = 0; - } -#endif - nob += payload_nob; - } - - ktx->ktx_port = (nob <= KQSW_SMALLMSG) ? - EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE; - - rc = kqswnal_launch (ktx); - - out: - CDEBUG(rc == 0 ? D_NET : D_NETERROR, "%s %d bytes to %s%s: rc %d\n", - routing ? (rc == 0 ? "Routed" : "Failed to route") : - (rc == 0 ? "Sent" : "Failed to send"), - nob, libcfs_nid2str(target.nid), - target_is_router ? "(router)" : "", rc); - - if (rc != 0) { - lnet_msg_t *repmsg = (lnet_msg_t *)ktx->ktx_args[2]; - int state = ktx->ktx_state; - - kqswnal_put_idle_tx (ktx); - - if (state == KTX_GETTING && repmsg != NULL) { - /* We committed to reply, but there was a problem - * launching the GET. We can't avoid delivering a - * REPLY event since we committed above, so we - * pretend the GET succeeded but the REPLY - * failed. */ - rc = 0; - lnet_finalize (kqswnal_data.kqn_ni, lntmsg, 0); - lnet_finalize (kqswnal_data.kqn_ni, repmsg, -EIO); - } - - } - - atomic_dec(&kqswnal_data.kqn_pending_txs); - return (rc == 0 ? 0 : -EIO); -} - -void -kqswnal_requeue_rx (kqswnal_rx_t *krx) -{ - LASSERT (atomic_read(&krx->krx_refcount) == 0); - LASSERT (!krx->krx_rpc_reply_needed); - - krx->krx_state = KRX_POSTED; - - if (kqswnal_data.kqn_shuttingdown) { - /* free EKC rxd on shutdown */ - ep_complete_receive(krx->krx_rxd); - } else { - /* repost receive */ - ep_requeue_receive(krx->krx_rxd, - kqswnal_rxhandler, krx, - &krx->krx_elanbuffer, 0); - } -} - -void -kqswnal_rpc_complete (EP_RXD *rxd) -{ - int status = ep_rxd_status(rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg(rxd); - - CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR, - "rxd %p, krx %p, status %d\n", rxd, krx, status); - - LASSERT (krx->krx_rxd == rxd); - LASSERT (krx->krx_rpc_reply_needed); - - krx->krx_rpc_reply_needed = 0; - kqswnal_requeue_rx (krx); -} - -void -kqswnal_rx_done (kqswnal_rx_t *krx) -{ - int rc; - - LASSERT (atomic_read(&krx->krx_refcount) == 0); - - if (krx->krx_rpc_reply_needed) { - /* We've not completed the peer's RPC yet... */ - krx->krx_rpc_reply.msg.magic = LNET_PROTO_QSW_MAGIC; - krx->krx_rpc_reply.msg.version = QSWLND_PROTO_VERSION; - - LASSERT (!in_interrupt()); - - rc = ep_complete_rpc(krx->krx_rxd, - kqswnal_rpc_complete, krx, - &krx->krx_rpc_reply.ep_statusblk, - NULL, NULL, 0); - if (rc == EP_SUCCESS) - return; - - CERROR("can't complete RPC: %d\n", rc); - krx->krx_rpc_reply_needed = 0; - } - - kqswnal_requeue_rx(krx); -} - -void -kqswnal_parse (kqswnal_rx_t *krx) -{ - lnet_ni_t *ni = kqswnal_data.kqn_ni; - kqswnal_msg_t *msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page); - lnet_nid_t fromnid = kqswnal_rx_nid(krx); - int swab; - int n; - int i; - int nob; - int rc; - - LASSERT (atomic_read(&krx->krx_refcount) == 1); - - /* If ln_ptlcompat is set, peers may send me an "old" unencapsulated - * lnet hdr */ - LASSERT (offsetof(kqswnal_msg_t, kqm_u) <= sizeof(lnet_hdr_t)); - - if (krx->krx_nob < offsetof(kqswnal_msg_t, kqm_u)) { - CERROR("Short message %d received from %s\n", - krx->krx_nob, libcfs_nid2str(fromnid)); - goto done; - } - - swab = msg->kqm_magic == __swab32(LNET_PROTO_QSW_MAGIC); - - if (swab || msg->kqm_magic == LNET_PROTO_QSW_MAGIC) { -#if KQSW_CKSUM - __u32 csum0; - __u32 csum1; - - /* csum byte array before swab */ - csum1 = msg->kqm_cksum; - msg->kqm_cksum = 0; - csum0 = kqswnal_csum_kiov(~0, 0, krx->krx_nob, - krx->krx_npages, krx->krx_kiov); - msg->kqm_cksum = csum1; -#endif - - if (swab) { - __swab16s(&msg->kqm_version); - __swab16s(&msg->kqm_type); -#if KQSW_CKSUM - __swab32s(&msg->kqm_cksum); - __swab32s(&msg->kqm_nob); -#endif - } - - if (msg->kqm_version != QSWLND_PROTO_VERSION) { - /* Future protocol version compatibility support! - * The next qswlnd-specific protocol rev will first - * send an RPC to check version. - * 1.4.6 and 1.4.7.early reply with a status - * block containing its current version. - * Later versions send a failure (-ve) status + - * magic/version */ - - if (!krx->krx_rpc_reply_needed) { - CERROR("Unexpected version %d from %s\n", - msg->kqm_version, libcfs_nid2str(fromnid)); - goto done; - } - - LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO); - goto done; - } - - switch (msg->kqm_type) { - default: - CERROR("Bad request type %x from %s\n", - msg->kqm_type, libcfs_nid2str(fromnid)); - goto done; - - case QSWLND_MSG_IMMEDIATE: - if (krx->krx_rpc_reply_needed) { - /* Should have been a simple message */ - CERROR("IMMEDIATE sent as RPC from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - nob = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload); - if (krx->krx_nob < nob) { - CERROR("Short IMMEDIATE %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - -#if KQSW_CKSUM - if (csum0 != msg->kqm_cksum) { - CERROR("Bad IMMEDIATE checksum %08x(%08x) from %s\n", - csum0, msg->kqm_cksum, libcfs_nid2str(fromnid)); - CERROR("nob %d (%d)\n", krx->krx_nob, msg->kqm_nob); - goto done; - } -#endif - rc = lnet_parse(ni, &msg->kqm_u.immediate.kqim_hdr, - fromnid, krx, 0); - if (rc < 0) - goto done; - return; - - case QSWLND_MSG_RDMA: - if (!krx->krx_rpc_reply_needed) { - /* Should have been a simple message */ - CERROR("RDMA sent as simple message from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - nob = offsetof(kqswnal_msg_t, - kqm_u.rdma.kqrm_rmd.kqrmd_frag[0]); - if (krx->krx_nob < nob) { - CERROR("Short RDMA message %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - - if (swab) - __swab32s(&msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag); - - n = msg->kqm_u.rdma.kqrm_rmd.kqrmd_nfrag; - nob = offsetof(kqswnal_msg_t, - kqm_u.rdma.kqrm_rmd.kqrmd_frag[n]); - - if (krx->krx_nob < nob) { - CERROR("short RDMA message %d(%d) from %s\n", - krx->krx_nob, nob, libcfs_nid2str(fromnid)); - goto done; - } - - if (swab) { - for (i = 0; i < n; i++) { - EP_NMD *nmd = &msg->kqm_u.rdma.kqrm_rmd.kqrmd_frag[i]; - - __swab32s(&nmd->nmd_addr); - __swab32s(&nmd->nmd_len); - __swab32s(&nmd->nmd_attr); - } - } - -#if KQSW_CKSUM - krx->krx_cksum = csum0; /* stash checksum so far */ -#endif - rc = lnet_parse(ni, &msg->kqm_u.rdma.kqrm_hdr, - fromnid, krx, 1); - if (rc < 0) - goto done; - return; - } - /* Not Reached */ - } - - if (msg->kqm_magic == LNET_PROTO_MAGIC || - msg->kqm_magic == __swab32(LNET_PROTO_MAGIC)) { - /* Future protocol version compatibility support! - * When LNET unifies protocols over all LNDs, the first thing a - * peer will send will be a version query RPC. - * 1.4.6 and 1.4.7.early reply with a status block containing - * LNET_PROTO_QSW_MAGIC.. - * Later versions send a failure (-ve) status + - * magic/version */ - - if (!krx->krx_rpc_reply_needed) { - CERROR("Unexpected magic %08x from %s\n", - msg->kqm_magic, libcfs_nid2str(fromnid)); - goto done; - } - - LASSERT (krx->krx_rpc_reply.msg.status == -EPROTO); - goto done; - } - - if (the_lnet.ln_ptlcompat != 0) { - /* Portals compatibility (strong or weak) - * This could be an unencapsulated LNET header. If it's big - * enough, let LNET's parser sort it out */ - - if (krx->krx_nob < sizeof(lnet_hdr_t)) { - CERROR("Short portals-compatible message from %s\n", - libcfs_nid2str(fromnid)); - goto done; - } - - krx->krx_raw_lnet_hdr = 1; - rc = lnet_parse(ni, (lnet_hdr_t *)msg, - fromnid, krx, krx->krx_rpc_reply_needed); - if (rc < 0) - goto done; - return; - } - - CERROR("Unrecognised magic %08x from %s\n", - msg->kqm_magic, libcfs_nid2str(fromnid)); - done: - kqswnal_rx_decref(krx); -} - -/* Receive Interrupt Handler: posts to schedulers */ -void -kqswnal_rxhandler(EP_RXD *rxd) -{ - unsigned long flags; - int nob = ep_rxd_len (rxd); - int status = ep_rxd_status (rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd); - CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n", - rxd, krx, nob, status); - - LASSERT (krx != NULL); - LASSERT (krx->krx_state == KRX_POSTED); - - krx->krx_state = KRX_PARSE; - krx->krx_rxd = rxd; - krx->krx_nob = nob; - krx->krx_raw_lnet_hdr = 0; - - /* RPC reply iff rpc request received without error */ - krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd) && - (status == EP_SUCCESS || - status == EP_MSG_TOO_BIG); - - /* Default to failure if an RPC reply is requested but not handled */ - krx->krx_rpc_reply.msg.status = -EPROTO; - atomic_set (&krx->krx_refcount, 1); - - if (status != EP_SUCCESS) { - /* receives complete with failure when receiver is removed */ - if (status == EP_SHUTDOWN) - LASSERT (kqswnal_data.kqn_shuttingdown); - else - CERROR("receive status failed with status %d nob %d\n", - ep_rxd_status(rxd), nob); - kqswnal_rx_decref(krx); - return; - } - - if (!in_interrupt()) { - kqswnal_parse(krx); - return; - } - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -int -kqswnal_recv (lnet_ni_t *ni, - void *private, - lnet_msg_t *lntmsg, - int delayed, - unsigned int niov, - struct iovec *iov, - lnet_kiov_t *kiov, - unsigned int offset, - unsigned int mlen, - unsigned int rlen) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)private; - lnet_nid_t fromnid; - kqswnal_msg_t *msg; - lnet_hdr_t *hdr; - kqswnal_remotemd_t *rmd; - int msg_offset; - int rc; - - LASSERT (!in_interrupt ()); /* OK to map */ - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - fromnid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ep_rxd_node(krx->krx_rxd)); - msg = (kqswnal_msg_t *)page_address(krx->krx_kiov[0].kiov_page); - - if (krx->krx_rpc_reply_needed) { - /* optimized (rdma) request sent as RPC */ - - if (krx->krx_raw_lnet_hdr) { - LASSERT (the_lnet.ln_ptlcompat != 0); - hdr = (lnet_hdr_t *)msg; - rmd = kqswnal_get_portalscompat_rmd(krx); - if (rmd == NULL) - return (-EPROTO); - } else { - LASSERT (msg->kqm_type == QSWLND_MSG_RDMA); - hdr = &msg->kqm_u.rdma.kqrm_hdr; - rmd = &msg->kqm_u.rdma.kqrm_rmd; - } - - /* NB header is still in wire byte order */ - - switch (le32_to_cpu(hdr->type)) { - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - /* This is an optimized PUT/REPLY */ - rc = kqswnal_rdma(krx, lntmsg, - KTX_RDMA_FETCH, rmd, - niov, iov, kiov, offset, mlen); - break; - - case LNET_MSG_GET: -#if KQSW_CKSUM - if (krx->krx_cksum != msg->kqm_cksum) { - CERROR("Bad GET checksum %08x(%08x) from %s\n", - krx->krx_cksum, msg->kqm_cksum, - libcfs_nid2str(fromnid)); - rc = -EIO; - break; - } -#endif - if (lntmsg == NULL) { - /* No buffer match: my decref will - * complete the RPC with failure */ - rc = 0; - } else { - /* Matched something! */ - rc = kqswnal_rdma(krx, lntmsg, - KTX_RDMA_STORE, rmd, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - } - break; - - default: - CERROR("Bad RPC type %d\n", - le32_to_cpu(hdr->type)); - rc = -EPROTO; - break; - } - - kqswnal_rx_decref(krx); - return rc; - } - - if (krx->krx_raw_lnet_hdr) { - LASSERT (the_lnet.ln_ptlcompat != 0); - msg_offset = sizeof(lnet_hdr_t); - } else { - LASSERT (msg->kqm_type == QSWLND_MSG_IMMEDIATE); - msg_offset = offsetof(kqswnal_msg_t, kqm_u.immediate.kqim_payload); - } - - if (krx->krx_nob < msg_offset + rlen) { - CERROR("Bad message size from %s: have %d, need %d + %d\n", - libcfs_nid2str(fromnid), krx->krx_nob, - msg_offset, rlen); - kqswnal_rx_decref(krx); - return -EPROTO; - } - - if (kiov != NULL) - lnet_copy_kiov2kiov(niov, kiov, offset, - krx->krx_npages, krx->krx_kiov, - msg_offset, mlen); - else - lnet_copy_kiov2iov(niov, iov, offset, - krx->krx_npages, krx->krx_kiov, - msg_offset, mlen); - - lnet_finalize(ni, lntmsg, 0); - kqswnal_rx_decref(krx); - return 0; -} - -int -kqswnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kqswnal_data.kqn_nthreads); - return (0); -} - -void -kqswnal_thread_fini (void) -{ - atomic_dec (&kqswnal_data.kqn_nthreads); -} - -int -kqswnal_scheduler (void *arg) -{ - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - unsigned long flags; - int rc; - int counter = 0; - int did_something; - - cfs_daemonize ("kqswnal_sched"); - cfs_block_allsigs (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - for (;;) - { - did_something = 0; - - if (!list_empty (&kqswnal_data.kqn_readyrxds)) - { - krx = list_entry(kqswnal_data.kqn_readyrxds.next, - kqswnal_rx_t, krx_list); - list_del (&krx->krx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - LASSERT (krx->krx_state == KRX_PARSE); - kqswnal_parse (krx); - - did_something = 1; - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_donetxds)) - { - ktx = list_entry(kqswnal_data.kqn_donetxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - kqswnal_tx_done_in_thread_context(ktx); - - did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) - { - ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, - kqswnal_tx_t, ktx_schedlist); - list_del_init (&ktx->ktx_schedlist); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - rc = kqswnal_launch (ktx); - if (rc != 0) { - CERROR("Failed delayed transmit to %s: %d\n", - libcfs_nid2str(ktx->ktx_nid), rc); - kqswnal_tx_done (ktx, rc); - } - atomic_dec (&kqswnal_data.kqn_pending_txs); - - did_something = 1; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == KQSW_RESCHED) { - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - counter = 0; - - if (!did_something) { - if (kqswnal_data.kqn_shuttingdown == 2) { - /* We only exit in stage 2 of shutdown when - * there's nothing left to do */ - break; - } - rc = wait_event_interruptible_exclusive ( - kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown == 2 || - !list_empty(&kqswnal_data.kqn_readyrxds) || - !list_empty(&kqswnal_data.kqn_donetxds) || - !list_empty(&kqswnal_data.kqn_delayedtxds)); - LASSERT (rc == 0); - } else if (need_resched()) - schedule (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - } - - kqswnal_thread_fini (); - return (0); -} diff --git a/lnet/klnds/qswlnd/qswlnd_modparams.c b/lnet/klnds/qswlnd/qswlnd_modparams.c deleted file mode 100644 index 62f89245466998e139a86843eb428969440ace03..0000000000000000000000000000000000000000 --- a/lnet/klnds/qswlnd/qswlnd_modparams.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2002-2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswlnd.h" - -static int tx_maxcontig = (1<<10); -CFS_MODULE_PARM(tx_maxcontig, "i", int, 0444, - "maximum payload to de-fragment"); - -static int ntxmsgs = 256; -CFS_MODULE_PARM(ntxmsgs, "i", int, 0444, - "# 'normal' tx msg buffers"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# per-peer concurrent sends"); - -static int nrxmsgs_large = 64; -CFS_MODULE_PARM(nrxmsgs_large, "i", int, 0444, - "# 'large' rx msg buffers"); - -static int ep_envelopes_large = 256; -CFS_MODULE_PARM(ep_envelopes_large, "i", int, 0444, - "# 'large' rx msg envelope buffers"); - -static int nrxmsgs_small = 256; -CFS_MODULE_PARM(nrxmsgs_small, "i", int, 0444, - "# 'small' rx msg buffers"); - -static int ep_envelopes_small = 2048; -CFS_MODULE_PARM(ep_envelopes_small, "i", int, 0444, - "# 'small' rx msg envelope buffers"); - -static int optimized_puts = (32<<10); -CFS_MODULE_PARM(optimized_puts, "i", int, 0644, - "zero-copy puts >= this size"); - -static int optimized_gets = 2048; -CFS_MODULE_PARM(optimized_gets, "i", int, 0644, - "zero-copy gets >= this size"); - -#if KQSW_CKSUM -static int inject_csum_error = 0; -CFS_MODULE_PARM(inject_csum_error, "i", int, 0644, - "test checksumming"); -#endif - -kqswnal_tunables_t kqswnal_tunables = { - .kqn_tx_maxcontig = &tx_maxcontig, - .kqn_ntxmsgs = &ntxmsgs, - .kqn_credits = &credits, - .kqn_peercredits = &peer_credits, - .kqn_nrxmsgs_large = &nrxmsgs_large, - .kqn_ep_envelopes_large = &ep_envelopes_large, - .kqn_nrxmsgs_small = &nrxmsgs_small, - .kqn_ep_envelopes_small = &ep_envelopes_small, - .kqn_optimized_puts = &optimized_puts, - .kqn_optimized_gets = &optimized_gets, -#if KQSW_CKSUM - .kqn_inject_csum_error = &inject_csum_error, -#endif -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table kqswnal_ctl_table[] = { - {1, "tx_maxcontig", &tx_maxcontig, - sizeof (int), 0444, NULL, &proc_dointvec}, - {2, "ntxmsgs", &ntxmsgs, - sizeof (int), 0444, NULL, &proc_dointvec}, - {3, "credits", &credits, - sizeof (int), 0444, NULL, &proc_dointvec}, - {4, "peer_credits", &peer_credits, - sizeof (int), 0444, NULL, &proc_dointvec}, - {5, "nrxmsgs_large", &nrxmsgs_large, - sizeof (int), 0444, NULL, &proc_dointvec}, - {6, "ep_envelopes_large", &ep_envelopes_large, - sizeof (int), 0444, NULL, &proc_dointvec}, - {7, "nrxmsgs_small", &nrxmsgs_small, - sizeof (int), 0444, NULL, &proc_dointvec}, - {8, "ep_envelopes_small", &ep_envelopes_small, - sizeof (int), 0444, NULL, &proc_dointvec}, - {9, "optimized_puts", &optimized_puts, - sizeof (int), 0644, NULL, &proc_dointvec}, - {10, "optimized_gets", &optimized_gets, - sizeof (int), 0644, NULL, &proc_dointvec}, -#if KQSW_CKSUM - {11, "inject_csum_error", &inject_csum_error, - sizeof (int), 0644, NULL, &proc_dointvec}, -#endif - {0} -}; - -static ctl_table kqswnal_top_ctl_table[] = { - {201, "qswnal", NULL, 0, 0555, kqswnal_ctl_table}, - {0} -}; - -int -kqswnal_tunables_init () -{ - kqswnal_tunables.kqn_sysctl = - register_sysctl_table(kqswnal_top_ctl_table, 0); - - if (kqswnal_tunables.kqn_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kqswnal_tunables_fini () -{ - if (kqswnal_tunables.kqn_sysctl != NULL) - unregister_sysctl_table(kqswnal_tunables.kqn_sysctl); -} -#else -int -kqswnal_tunables_init () -{ - return 0; -} - -void -kqswnal_tunables_fini () -{ -} -#endif diff --git a/lnet/klnds/ralnd/.cvsignore b/lnet/klnds/ralnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/ralnd/Makefile.in b/lnet/klnds/ralnd/Makefile.in deleted file mode 100644 index e1f5e8242e6fff49e36014d00cfa2d491d511528..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kralnd -kralnd-objs := ralnd.o ralnd_cb.o ralnd_modparams.o - -EXTRA_POST_CFLAGS := @RACPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ralnd/autoMakefile.am b/lnet/klnds/ralnd/autoMakefile.am deleted file mode 100644 index 7f3df4c432d71b4596a69a4be76eb5eb609b7530..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_RALND -modulenet_DATA = kralnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kralnd-objs:%.o=%.c) ralnd.h diff --git a/lnet/klnds/ralnd/ralnd.c b/lnet/klnds/ralnd/ralnd.c deleted file mode 100644 index a0a4d93f164862731f788505c3457625b3913538..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd.c +++ /dev/null @@ -1,1741 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include "ralnd.h" - -static int kranal_devids[RANAL_MAXDEVS] = {RAPK_MAIN_DEVICE_ID, - RAPK_EXPANSION_DEVICE_ID}; - -lnd_t the_kralnd = { - .lnd_type = RALND, - .lnd_startup = kranal_startup, - .lnd_shutdown = kranal_shutdown, - .lnd_ctl = kranal_ctl, - .lnd_send = kranal_send, - .lnd_recv = kranal_recv, - .lnd_eager_recv = kranal_eager_recv, - .lnd_accept = kranal_accept, -}; - -kra_data_t kranal_data; - -void -kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn, lnet_nid_t dstnid) -{ - RAP_RETURN rrc; - - memset(connreq, 0, sizeof(*connreq)); - - connreq->racr_magic = RANAL_MSG_MAGIC; - connreq->racr_version = RANAL_MSG_VERSION; - - if (conn == NULL) /* prepping a "stub" reply */ - return; - - connreq->racr_devid = conn->rac_device->rad_id; - connreq->racr_srcnid = lnet_ptlcompat_srcnid(kranal_data.kra_ni->ni_nid, - dstnid); - connreq->racr_dstnid = dstnid; - connreq->racr_peerstamp = kranal_data.kra_peerstamp; - connreq->racr_connstamp = conn->rac_my_connstamp; - connreq->racr_timeout = conn->rac_timeout; - - rrc = RapkGetRiParams(conn->rac_rihandle, &connreq->racr_riparams); - LASSERT(rrc == RAP_SUCCESS); -} - -int -kranal_recv_connreq(struct socket *sock, kra_connreq_t *connreq, int active) -{ - int timeout = active ? *kranal_tunables.kra_timeout : - lnet_acceptor_timeout(); - int swab; - int rc; - - /* return 0 on success, -ve on error, +ve to tell the peer I'm "old" */ - - rc = libcfs_sock_read(sock, &connreq->racr_magic, - sizeof(connreq->racr_magic), timeout); - if (rc != 0) { - CERROR("Read(magic) failed(1): %d\n", rc); - return -EIO; - } - - if (connreq->racr_magic != RANAL_MSG_MAGIC && - connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) { - /* Unexpected magic! */ - if (!active && - the_lnet.ln_ptlcompat == 0 && - (connreq->racr_magic == LNET_PROTO_MAGIC || - connreq->racr_magic == __swab32(LNET_PROTO_MAGIC))) { - /* future protocol version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. +ve rc means I - * reply with my current magic/version */ - return EPROTO; - } - - if (active || - the_lnet.ln_ptlcompat == 0) { - CERROR("Unexpected magic %08x (1)\n", - connreq->racr_magic); - return -EPROTO; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. This isn't a connreq, so I'll get the acceptor to - * look at it... */ - rc = lnet_accept(kranal_data.kra_ni, sock, connreq->racr_magic); - if (rc != 0) - return -EPROTO; - - /* ...and if it's OK I'm back to looking for a connreq... */ - rc = libcfs_sock_read(sock, &connreq->racr_magic, - sizeof(connreq->racr_magic), timeout); - if (rc != 0) { - CERROR("Read(magic) failed(2): %d\n", rc); - return -EIO; - } - - if (connreq->racr_magic != RANAL_MSG_MAGIC && - connreq->racr_magic != __swab32(RANAL_MSG_MAGIC)) { - CERROR("Unexpected magic %08x(2)\n", - connreq->racr_magic); - return -EPROTO; - } - } - - swab = (connreq->racr_magic == __swab32(RANAL_MSG_MAGIC)); - - rc = libcfs_sock_read(sock, &connreq->racr_version, - sizeof(connreq->racr_version), timeout); - if (rc != 0) { - CERROR("Read(version) failed: %d\n", rc); - return -EIO; - } - - if (swab) - __swab16s(&connreq->racr_version); - - if (connreq->racr_version != RANAL_MSG_VERSION) { - if (active) { - CERROR("Unexpected version %d\n", connreq->racr_version); - return -EPROTO; - } - /* If this is a future version of the ralnd protocol, and I'm - * passive (accepted the connection), tell my peer I'm "old" - * (+ve rc) */ - return EPROTO; - } - - rc = libcfs_sock_read(sock, &connreq->racr_devid, - sizeof(connreq->racr_version) - - offsetof(kra_connreq_t, racr_devid), - timeout); - if (rc != 0) { - CERROR("Read(body) failed: %d\n", rc); - return -EIO; - } - - if (swab) { - __swab32s(&connreq->racr_magic); - __swab16s(&connreq->racr_version); - __swab16s(&connreq->racr_devid); - __swab64s(&connreq->racr_srcnid); - __swab64s(&connreq->racr_dstnid); - __swab64s(&connreq->racr_peerstamp); - __swab64s(&connreq->racr_connstamp); - __swab32s(&connreq->racr_timeout); - - __swab32s(&connreq->racr_riparams.HostId); - __swab32s(&connreq->racr_riparams.FmaDomainHndl); - __swab32s(&connreq->racr_riparams.PTag); - __swab32s(&connreq->racr_riparams.CompletionCookie); - } - - if (connreq->racr_srcnid == LNET_NID_ANY || - connreq->racr_dstnid == LNET_NID_ANY) { - CERROR("Received LNET_NID_ANY\n"); - return -EPROTO; - } - - if (connreq->racr_timeout < RANAL_MIN_TIMEOUT) { - CERROR("Received timeout %d < MIN %d\n", - connreq->racr_timeout, RANAL_MIN_TIMEOUT); - return -EPROTO; - } - - return 0; -} - -int -kranal_close_stale_conns_locked (kra_peer_t *peer, kra_conn_t *newconn) -{ - kra_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int loopback; - int count = 0; - - loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; - - list_for_each_safe (ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - if (conn == newconn) - continue; - - if (conn->rac_peerstamp != newconn->rac_peerstamp) { - CDEBUG(D_NET, "Closing stale conn nid: %s " - " peerstamp:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->rap_nid), - conn->rac_peerstamp, newconn->rac_peerstamp); - LASSERT (conn->rac_peerstamp < newconn->rac_peerstamp); - count++; - kranal_close_conn_locked(conn, -ESTALE); - continue; - } - - if (conn->rac_device != newconn->rac_device) - continue; - - if (loopback && - newconn->rac_my_connstamp == conn->rac_peer_connstamp && - newconn->rac_peer_connstamp == conn->rac_my_connstamp) - continue; - - LASSERT (conn->rac_peer_connstamp < newconn->rac_peer_connstamp); - - CDEBUG(D_NET, "Closing stale conn nid: %s" - " connstamp:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->rap_nid), - conn->rac_peer_connstamp, newconn->rac_peer_connstamp); - - count++; - kranal_close_conn_locked(conn, -ESTALE); - } - - return count; -} - -int -kranal_conn_isdup_locked(kra_peer_t *peer, kra_conn_t *newconn) -{ - kra_conn_t *conn; - struct list_head *tmp; - int loopback; - - loopback = peer->rap_nid == kranal_data.kra_ni->ni_nid; - - list_for_each(tmp, &peer->rap_conns) { - conn = list_entry(tmp, kra_conn_t, rac_list); - - /* 'newconn' is from an earlier version of 'peer'!!! */ - if (newconn->rac_peerstamp < conn->rac_peerstamp) - return 1; - - /* 'conn' is from an earlier version of 'peer': it will be - * removed when we cull stale conns later on... */ - if (newconn->rac_peerstamp > conn->rac_peerstamp) - continue; - - /* Different devices are OK */ - if (conn->rac_device != newconn->rac_device) - continue; - - /* It's me connecting to myself */ - if (loopback && - newconn->rac_my_connstamp == conn->rac_peer_connstamp && - newconn->rac_peer_connstamp == conn->rac_my_connstamp) - continue; - - /* 'newconn' is an earlier connection from 'peer'!!! */ - if (newconn->rac_peer_connstamp < conn->rac_peer_connstamp) - return 2; - - /* 'conn' is an earlier connection from 'peer': it will be - * removed when we cull stale conns later on... */ - if (newconn->rac_peer_connstamp > conn->rac_peer_connstamp) - continue; - - /* 'newconn' has the SAME connection stamp; 'peer' isn't - * playing the game... */ - return 3; - } - - return 0; -} - -void -kranal_set_conn_uniqueness (kra_conn_t *conn) -{ - unsigned long flags; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - conn->rac_my_connstamp = kranal_data.kra_connstamp++; - - do { /* allocate a unique cqid */ - conn->rac_cqid = kranal_data.kra_next_cqid++; - } while (kranal_cqid2conn_locked(conn->rac_cqid) != NULL); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); -} - -int -kranal_create_conn(kra_conn_t **connp, kra_device_t *dev) -{ - kra_conn_t *conn; - RAP_RETURN rrc; - - LASSERT (!in_interrupt()); - LIBCFS_ALLOC(conn, sizeof(*conn)); - - if (conn == NULL) - return -ENOMEM; - - memset(conn, 0, sizeof(*conn)); - atomic_set(&conn->rac_refcount, 1); - INIT_LIST_HEAD(&conn->rac_list); - INIT_LIST_HEAD(&conn->rac_hashlist); - INIT_LIST_HEAD(&conn->rac_schedlist); - INIT_LIST_HEAD(&conn->rac_fmaq); - INIT_LIST_HEAD(&conn->rac_rdmaq); - INIT_LIST_HEAD(&conn->rac_replyq); - spin_lock_init(&conn->rac_lock); - - kranal_set_conn_uniqueness(conn); - - conn->rac_device = dev; - conn->rac_timeout = MAX(*kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT); - kranal_update_reaper_timeout(conn->rac_timeout); - - rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid, - &conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("RapkCreateRi failed: %d\n", rrc); - LIBCFS_FREE(conn, sizeof(*conn)); - return -ENETDOWN; - } - - atomic_inc(&kranal_data.kra_nconns); - *connp = conn; - return 0; -} - -void -kranal_destroy_conn(kra_conn_t *conn) -{ - RAP_RETURN rrc; - - LASSERT (!in_interrupt()); - LASSERT (!conn->rac_scheduled); - LASSERT (list_empty(&conn->rac_list)); - LASSERT (list_empty(&conn->rac_hashlist)); - LASSERT (list_empty(&conn->rac_schedlist)); - LASSERT (atomic_read(&conn->rac_refcount) == 0); - LASSERT (list_empty(&conn->rac_fmaq)); - LASSERT (list_empty(&conn->rac_rdmaq)); - LASSERT (list_empty(&conn->rac_replyq)); - - rrc = RapkDestroyRi(conn->rac_device->rad_handle, - conn->rac_rihandle); - LASSERT (rrc == RAP_SUCCESS); - - if (conn->rac_peer != NULL) - kranal_peer_decref(conn->rac_peer); - - LIBCFS_FREE(conn, sizeof(*conn)); - atomic_dec(&kranal_data.kra_nconns); -} - -void -kranal_terminate_conn_locked (kra_conn_t *conn) -{ - LASSERT (!in_interrupt()); - LASSERT (conn->rac_state == RANAL_CONN_CLOSING); - LASSERT (!list_empty(&conn->rac_hashlist)); - LASSERT (list_empty(&conn->rac_list)); - - /* Remove from conn hash table: no new callbacks */ - list_del_init(&conn->rac_hashlist); - kranal_conn_decref(conn); - - conn->rac_state = RANAL_CONN_CLOSED; - - /* schedule to clear out all uncompleted comms in context of dev's - * scheduler */ - kranal_schedule_conn(conn); -} - -void -kranal_close_conn_locked (kra_conn_t *conn, int error) -{ - kra_peer_t *peer = conn->rac_peer; - - CDEBUG(error == 0 ? D_NET : D_NETERROR, - "closing conn to %s: error %d\n", - libcfs_nid2str(peer->rap_nid), error); - - LASSERT (!in_interrupt()); - LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED); - LASSERT (!list_empty(&conn->rac_hashlist)); - LASSERT (!list_empty(&conn->rac_list)); - - list_del_init(&conn->rac_list); - - if (list_empty(&peer->rap_conns) && - peer->rap_persistence == 0) { - /* Non-persistent peer with no more conns... */ - kranal_unlink_peer_locked(peer); - } - - /* Reset RX timeout to ensure we wait for an incoming CLOSE for the - * full timeout. If we get a CLOSE we know the peer has stopped all - * RDMA. Otherwise if we wait for the full timeout we can also be sure - * all RDMA has stopped. */ - conn->rac_last_rx = jiffies; - mb(); - - conn->rac_state = RANAL_CONN_CLOSING; - kranal_schedule_conn(conn); /* schedule sending CLOSE */ - - kranal_conn_decref(conn); /* lose peer's ref */ -} - -void -kranal_close_conn (kra_conn_t *conn, int error) -{ - unsigned long flags; - - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_ESTABLISHED) - kranal_close_conn_locked(conn, error); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); -} - -int -kranal_set_conn_params(kra_conn_t *conn, kra_connreq_t *connreq, - __u32 peer_ip, int peer_port) -{ - kra_device_t *dev = conn->rac_device; - unsigned long flags; - RAP_RETURN rrc; - - /* CAVEAT EMPTOR: we're really overloading rac_last_tx + rac_keepalive - * to do RapkCompleteSync() timekeeping (see kibnal_scheduler). */ - conn->rac_last_tx = jiffies; - conn->rac_keepalive = 0; - - rrc = RapkSetRiParams(conn->rac_rihandle, &connreq->racr_riparams); - if (rrc != RAP_SUCCESS) { - CERROR("Error setting riparams from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rrc); - return -ECONNABORTED; - } - - /* Schedule conn on rad_new_conns */ - kranal_conn_addref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - list_add_tail(&conn->rac_schedlist, &dev->rad_new_conns); - wake_up(&dev->rad_waitq); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - rrc = RapkWaitToConnect(conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("Error waiting to connect to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rrc); - return -ECONNABORTED; - } - - /* Scheduler doesn't touch conn apart from to deschedule and decref it - * after RapkCompleteSync() return success, so conn is all mine */ - - conn->rac_peerstamp = connreq->racr_peerstamp; - conn->rac_peer_connstamp = connreq->racr_connstamp; - conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq->racr_timeout); - kranal_update_reaper_timeout(conn->rac_keepalive); - return 0; -} - -int -kranal_passive_conn_handshake (struct socket *sock, lnet_nid_t *src_nidp, - lnet_nid_t *dst_nidp, kra_conn_t **connp) -{ - __u32 peer_ip; - unsigned int peer_port; - kra_connreq_t rx_connreq; - kra_connreq_t tx_connreq; - kra_conn_t *conn; - kra_device_t *dev; - int rc; - int i; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't get peer's IP: %d\n", rc); - return rc; - } - - rc = kranal_recv_connreq(sock, &rx_connreq, 0); - - if (rc < 0) { - CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - return rc; - } - - if (rc > 0) { - /* Request from "new" peer: send reply with my MAGIC/VERSION to - * tell her I'm old... */ - kranal_pack_connreq(&tx_connreq, NULL, LNET_NID_ANY); - - rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq), - lnet_acceptor_timeout()); - if (rc != 0) - CERROR("Can't tx stub connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - - return -EPROTO; - } - - for (i = 0;;i++) { - if (i == kranal_data.kra_ndevs) { - CERROR("Can't match dev %d from %u.%u.%u.%u/%d\n", - rx_connreq.racr_devid, HIPQUAD(peer_ip), peer_port); - return -ENODEV; - } - dev = &kranal_data.kra_devices[i]; - if (dev->rad_id == rx_connreq.racr_devid) - break; - } - - rc = kranal_create_conn(&conn, dev); - if (rc != 0) - return rc; - - kranal_pack_connreq(&tx_connreq, conn, rx_connreq.racr_srcnid); - - rc = libcfs_sock_write(sock, &tx_connreq, sizeof(tx_connreq), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer_ip), peer_port, rc); - kranal_conn_decref(conn); - return rc; - } - - rc = kranal_set_conn_params(conn, &rx_connreq, peer_ip, peer_port); - if (rc != 0) { - kranal_conn_decref(conn); - return rc; - } - - *connp = conn; - *src_nidp = rx_connreq.racr_srcnid; - *dst_nidp = rx_connreq.racr_dstnid; - return 0; -} - -int -kranal_active_conn_handshake(kra_peer_t *peer, - lnet_nid_t *dst_nidp, kra_conn_t **connp) -{ - kra_connreq_t connreq; - kra_conn_t *conn; - kra_device_t *dev; - struct socket *sock; - int rc; - unsigned int idx; - - /* spread connections over all devices using both peer NIDs to ensure - * all nids use all devices */ - idx = peer->rap_nid + kranal_data.kra_ni->ni_nid; - dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs]; - - rc = kranal_create_conn(&conn, dev); - if (rc != 0) - return rc; - - kranal_pack_connreq(&connreq, conn, peer->rap_nid); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto test */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - connreq.racr_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - connreq.racr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - rc = lnet_connect(&sock, peer->rap_nid, - 0, peer->rap_ip, peer->rap_port); - if (rc != 0) - goto failed_0; - - /* CAVEAT EMPTOR: the passive side receives with a SHORT rx timeout - * immediately after accepting a connection, so we connect and then - * send immediately. */ - - rc = libcfs_sock_write(sock, &connreq, sizeof(connreq), - lnet_acceptor_timeout()); - if (rc != 0) { - CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, rc); - goto failed_2; - } - - rc = kranal_recv_connreq(sock, &connreq, 1); - if (rc != 0) { - CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, rc); - goto failed_2; - } - - libcfs_sock_release(sock); - rc = -EPROTO; - - if (connreq.racr_srcnid != peer->rap_nid) { - CERROR("Unexpected srcnid from %u.%u.%u.%u/%d: " - "received %s expected %s\n", - HIPQUAD(peer->rap_ip), peer->rap_port, - libcfs_nid2str(connreq.racr_srcnid), - libcfs_nid2str(peer->rap_nid)); - goto failed_1; - } - - if (connreq.racr_devid != dev->rad_id) { - CERROR("Unexpected device id from %u.%u.%u.%u/%d: " - "received %d expected %d\n", - HIPQUAD(peer->rap_ip), peer->rap_port, - connreq.racr_devid, dev->rad_id); - goto failed_1; - } - - rc = kranal_set_conn_params(conn, &connreq, - peer->rap_ip, peer->rap_port); - if (rc != 0) - goto failed_1; - - *connp = conn; - *dst_nidp = connreq.racr_dstnid; - return 0; - - failed_2: - libcfs_sock_release(sock); - failed_1: - lnet_connect_console_error(rc, peer->rap_nid, - peer->rap_ip, peer->rap_port); - failed_0: - kranal_conn_decref(conn); - return rc; -} - -int -kranal_conn_handshake (struct socket *sock, kra_peer_t *peer) -{ - kra_peer_t *peer2; - kra_tx_t *tx; - lnet_nid_t peer_nid; - lnet_nid_t dst_nid; - unsigned long flags; - kra_conn_t *conn; - int rc; - int nstale; - int new_peer = 0; - - if (sock == NULL) { - /* active: connd wants to connect to 'peer' */ - LASSERT (peer != NULL); - LASSERT (peer->rap_connecting); - - rc = kranal_active_conn_handshake(peer, &dst_nid, &conn); - if (rc != 0) - return rc; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (!kranal_peer_active(peer)) { - /* raced with peer getting unlinked */ - write_unlock_irqrestore(&kranal_data.kra_global_lock, - flags); - kranal_conn_decref(conn); - return -ESTALE; - } - - peer_nid = peer->rap_nid; - } else { - /* passive: listener accepted 'sock' */ - LASSERT (peer == NULL); - - rc = kranal_passive_conn_handshake(sock, &peer_nid, - &dst_nid, &conn); - if (rc != 0) - return rc; - - /* assume this is a new peer */ - rc = kranal_create_peer(&peer, peer_nid); - if (rc != 0) { - CERROR("Can't create conn for %s\n", - libcfs_nid2str(peer_nid)); - kranal_conn_decref(conn); - return -ENOMEM; - } - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - peer2 = kranal_find_peer_locked(peer_nid); - if (peer2 == NULL) { - new_peer = 1; - } else { - /* peer_nid already in the peer table */ - kranal_peer_decref(peer); - peer = peer2; - } - } - - LASSERT ((!new_peer) != (!kranal_peer_active(peer))); - - /* Refuse connection if peer thinks we are a different NID. We check - * this while holding the global lock, to synch with connection - * destruction on NID change. */ - if (!lnet_ptlcompat_matchnid(kranal_data.kra_ni->ni_nid, dst_nid)) { - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - CERROR("Stale/bad connection with %s: dst_nid %s, expected %s\n", - libcfs_nid2str(peer_nid), libcfs_nid2str(dst_nid), - libcfs_nid2str(kranal_data.kra_ni->ni_nid)); - rc = -ESTALE; - goto failed; - } - - /* Refuse to duplicate an existing connection (both sides might try to - * connect at once). NB we return success! We _are_ connected so we - * _don't_ have any blocked txs to complete with failure. */ - rc = kranal_conn_isdup_locked(peer, conn); - if (rc != 0) { - LASSERT (!list_empty(&peer->rap_conns)); - LASSERT (list_empty(&peer->rap_tx_queue)); - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - CWARN("Not creating duplicate connection to %s: %d\n", - libcfs_nid2str(peer_nid), rc); - rc = 0; - goto failed; - } - - if (new_peer) { - /* peer table takes my ref on the new peer */ - list_add_tail(&peer->rap_list, - kranal_nid2peerlist(peer_nid)); - } - - /* initialise timestamps before reaper looks at them */ - conn->rac_last_tx = conn->rac_last_rx = jiffies; - - kranal_peer_addref(peer); /* +1 ref for conn */ - conn->rac_peer = peer; - list_add_tail(&conn->rac_list, &peer->rap_conns); - - kranal_conn_addref(conn); /* +1 ref for conn table */ - list_add_tail(&conn->rac_hashlist, - kranal_cqid2connlist(conn->rac_cqid)); - - /* Schedule all packets blocking for a connection */ - while (!list_empty(&peer->rap_tx_queue)) { - tx = list_entry(peer->rap_tx_queue.next, - kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_post_fma(conn, tx); - } - - nstale = kranal_close_stale_conns_locked(peer, conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* CAVEAT EMPTOR: passive peer can disappear NOW */ - - if (nstale != 0) - CWARN("Closed %d stale conns to %s\n", nstale, - libcfs_nid2str(peer_nid)); - - CWARN("New connection to %s on devid[%d] = %d\n", - libcfs_nid2str(peer_nid), - conn->rac_device->rad_idx, conn->rac_device->rad_id); - - /* Ensure conn gets checked. Transmits may have been queued and an - * FMA event may have happened before it got in the cq hash table */ - kranal_schedule_conn(conn); - return 0; - - failed: - if (new_peer) - kranal_peer_decref(peer); - kranal_conn_decref(conn); - return rc; -} - -void -kranal_connect (kra_peer_t *peer) -{ - kra_tx_t *tx; - unsigned long flags; - struct list_head zombies; - int rc; - - LASSERT (peer->rap_connecting); - - CDEBUG(D_NET, "About to handshake %s\n", - libcfs_nid2str(peer->rap_nid)); - - rc = kranal_conn_handshake(NULL, peer); - - CDEBUG(D_NET, "Done handshake %s:%d \n", - libcfs_nid2str(peer->rap_nid), rc); - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - LASSERT (peer->rap_connecting); - peer->rap_connecting = 0; - - if (rc == 0) { - /* kranal_conn_handshake() queues blocked txs immediately on - * success to avoid messages jumping the queue */ - LASSERT (list_empty(&peer->rap_tx_queue)); - - peer->rap_reconnect_interval = 0; /* OK to reconnect at any time */ - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return; - } - - peer->rap_reconnect_interval *= 2; - peer->rap_reconnect_interval = - MAX(peer->rap_reconnect_interval, - *kranal_tunables.kra_min_reconnect_interval); - peer->rap_reconnect_interval = - MIN(peer->rap_reconnect_interval, - *kranal_tunables.kra_max_reconnect_interval); - - peer->rap_reconnect_time = jiffies + peer->rap_reconnect_interval * HZ; - - /* Grab all blocked packets while we have the global lock */ - list_add(&zombies, &peer->rap_tx_queue); - list_del_init(&peer->rap_tx_queue); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - if (list_empty(&zombies)) - return; - - CDEBUG(D_NETERROR, "Dropping packets for %s: connection failed\n", - libcfs_nid2str(peer->rap_nid)); - - do { - tx = list_entry(zombies.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -EHOSTUNREACH); - - } while (!list_empty(&zombies)); -} - -void -kranal_free_acceptsock (kra_acceptsock_t *ras) -{ - libcfs_sock_release(ras->ras_sock); - LIBCFS_FREE(ras, sizeof(*ras)); -} - -int -kranal_accept (lnet_ni_t *ni, struct socket *sock) -{ - kra_acceptsock_t *ras; - int rc; - __u32 peer_ip; - int peer_port; - unsigned long flags; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - LIBCFS_ALLOC(ras, sizeof(*ras)); - if (ras == NULL) { - CERROR("ENOMEM allocating connection request from " - "%u.%u.%u.%u\n", HIPQUAD(peer_ip)); - return -ENOMEM; - } - - ras->ras_sock = sock; - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - - list_add_tail(&ras->ras_list, &kranal_data.kra_connd_acceptq); - wake_up(&kranal_data.kra_connd_waitq); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - return 0; -} - -int -kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid) -{ - kra_peer_t *peer; - unsigned long flags; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof(*peer)); - if (peer == NULL) - return -ENOMEM; - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->rap_nid = nid; - atomic_set(&peer->rap_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD(&peer->rap_list); - INIT_LIST_HEAD(&peer->rap_connd_list); - INIT_LIST_HEAD(&peer->rap_conns); - INIT_LIST_HEAD(&peer->rap_tx_queue); - - peer->rap_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (kranal_data.kra_nonewpeers) { - /* shutdown has started already */ - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - LIBCFS_FREE(peer, sizeof(*peer)); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - atomic_inc(&kranal_data.kra_npeers); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - *peerp = peer; - return 0; -} - -void -kranal_destroy_peer (kra_peer_t *peer) -{ - CDEBUG(D_NET, "peer %s %p deleted\n", - libcfs_nid2str(peer->rap_nid), peer); - - LASSERT (atomic_read(&peer->rap_refcount) == 0); - LASSERT (peer->rap_persistence == 0); - LASSERT (!kranal_peer_active(peer)); - LASSERT (!peer->rap_connecting); - LASSERT (list_empty(&peer->rap_conns)); - LASSERT (list_empty(&peer->rap_tx_queue)); - LASSERT (list_empty(&peer->rap_connd_list)); - - LIBCFS_FREE(peer, sizeof(*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kranal_data.kra_npeers); -} - -kra_peer_t * -kranal_find_peer_locked (lnet_nid_t nid) -{ - struct list_head *peer_list = kranal_nid2peerlist(nid); - struct list_head *tmp; - kra_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry(tmp, kra_peer_t, rap_list); - - LASSERT (peer->rap_persistence > 0 || /* persistent peer */ - !list_empty(&peer->rap_conns)); /* active conn */ - - if (peer->rap_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read(&peer->rap_refcount)); - return peer; - } - return NULL; -} - -kra_peer_t * -kranal_find_peer (lnet_nid_t nid) -{ - kra_peer_t *peer; - - read_lock(&kranal_data.kra_global_lock); - peer = kranal_find_peer_locked(nid); - if (peer != NULL) /* +1 ref for caller? */ - kranal_peer_addref(peer); - read_unlock(&kranal_data.kra_global_lock); - - return peer; -} - -void -kranal_unlink_peer_locked (kra_peer_t *peer) -{ - LASSERT (peer->rap_persistence == 0); - LASSERT (list_empty(&peer->rap_conns)); - - LASSERT (kranal_peer_active(peer)); - list_del_init(&peer->rap_list); - - /* lose peerlist's ref */ - kranal_peer_decref(peer); -} - -int -kranal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, int *portp, - int *persistencep) -{ - kra_peer_t *peer; - struct list_head *ptmp; - int i; - - read_lock(&kranal_data.kra_global_lock); - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) { - - list_for_each(ptmp, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->rap_nid; - *ipp = peer->rap_ip; - *portp = peer->rap_port; - *persistencep = peer->rap_persistence; - - read_unlock(&kranal_data.kra_global_lock); - return 0; - } - } - - read_unlock(&kranal_data.kra_global_lock); - return -ENOENT; -} - -int -kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port) -{ - unsigned long flags; - kra_peer_t *peer; - kra_peer_t *peer2; - int rc; - - if (nid == LNET_NID_ANY) - return -EINVAL; - - rc = kranal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - peer2 = kranal_find_peer_locked(nid); - if (peer2 != NULL) { - kranal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail(&peer->rap_list, - kranal_nid2peerlist(nid)); - } - - peer->rap_ip = ip; - peer->rap_port = port; - peer->rap_persistence++; - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return 0; -} - -void -kranal_del_peer_locked (kra_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kra_conn_t *conn; - - peer->rap_persistence = 0; - - if (list_empty(&peer->rap_conns)) { - kranal_unlink_peer_locked(peer); - } else { - list_for_each_safe(ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - kranal_close_conn_locked(conn, 0); - } - /* peer unlinks itself when last conn is closed */ - } -} - -int -kranal_del_peer (lnet_nid_t nid) -{ - unsigned long flags; - struct list_head *ptmp; - struct list_head *pnxt; - kra_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers; - else { - lo = 0; - hi = kranal_data.kra_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) { - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (!(nid == LNET_NID_ANY || peer->rap_nid == nid)) - continue; - - kranal_del_peer_locked(peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - return rc; -} - -kra_conn_t * -kranal_get_conn_by_idx (int index) -{ - kra_peer_t *peer; - struct list_head *ptmp; - kra_conn_t *conn; - struct list_head *ctmp; - int i; - - read_lock (&kranal_data.kra_global_lock); - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) { - list_for_each (ptmp, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - list_for_each (ctmp, &peer->rap_conns) { - if (index-- > 0) - continue; - - conn = list_entry(ctmp, kra_conn_t, rac_list); - CDEBUG(D_NET, "++conn[%p] -> %s (%d)\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid), - atomic_read(&conn->rac_refcount)); - atomic_inc(&conn->rac_refcount); - read_unlock(&kranal_data.kra_global_lock); - return conn; - } - } - } - - read_unlock(&kranal_data.kra_global_lock); - return NULL; -} - -int -kranal_close_peer_conns_locked (kra_peer_t *peer, int why) -{ - kra_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->rap_conns) { - conn = list_entry(ctmp, kra_conn_t, rac_list); - - count++; - kranal_close_conn_locked(conn, why); - } - - return count; -} - -int -kranal_close_matching_conns (lnet_nid_t nid) -{ - unsigned long flags; - kra_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers; - else { - lo = 0; - hi = kranal_data.kra_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) { - - peer = list_entry(ptmp, kra_peer_t, rap_list); - LASSERT (peer->rap_persistence > 0 || - !list_empty(&peer->rap_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->rap_nid)) - continue; - - count += kranal_close_peer_conns_locked(peer, 0); - } - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return 0; - - return (count == 0) ? -ENOENT : 0; -} - -int -kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kranal_data.kra_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int port = 0; - int share_count = 0; - - rc = kranal_get_peer_info(data->ioc_count, - &nid, &ip, &port, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kranal_add_persistent_peer(data->ioc_nid, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kranal_del_peer(data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kra_conn_t *conn = kranal_get_conn_by_idx(data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - rc = 0; - data->ioc_nid = conn->rac_peer->rap_nid; - data->ioc_u32[0] = conn->rac_device->rad_id; - kranal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kranal_close_matching_conns(data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kranal_free_txdescs(struct list_head *freelist) -{ - kra_tx_t *tx; - - while (!list_empty(freelist)) { - tx = list_entry(freelist->next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - LIBCFS_FREE(tx->tx_phys, LNET_MAX_IOV * sizeof(*tx->tx_phys)); - LIBCFS_FREE(tx, sizeof(*tx)); - } -} - -int -kranal_alloc_txdescs(struct list_head *freelist, int n) -{ - int i; - kra_tx_t *tx; - - LASSERT (freelist == &kranal_data.kra_idle_txs); - LASSERT (list_empty(freelist)); - - for (i = 0; i < n; i++) { - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Can't allocate tx[%d]\n", i); - kranal_free_txdescs(freelist); - return -ENOMEM; - } - - LIBCFS_ALLOC(tx->tx_phys, - LNET_MAX_IOV * sizeof(*tx->tx_phys)); - if (tx->tx_phys == NULL) { - CERROR("Can't allocate tx[%d]->tx_phys\n", i); - - LIBCFS_FREE(tx, sizeof(*tx)); - kranal_free_txdescs(freelist); - return -ENOMEM; - } - - tx->tx_buftype = RANAL_BUF_NONE; - tx->tx_msg.ram_type = RANAL_MSG_NONE; - - list_add(&tx->tx_list, freelist); - } - - return 0; -} - -int -kranal_device_init(int id, kra_device_t *dev) -{ - int total_ntx = *kranal_tunables.kra_ntx; - RAP_RETURN rrc; - - dev->rad_id = id; - rrc = RapkGetDeviceByIndex(id, kranal_device_callback, - &dev->rad_handle); - if (rrc != RAP_SUCCESS) { - CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc); - goto failed_0; - } - - rrc = RapkReserveRdma(dev->rad_handle, total_ntx); - if (rrc != RAP_SUCCESS) { - CERROR("Can't reserve %d RDMA descriptors" - " for device %d: %d\n", total_ntx, id, rrc); - goto failed_1; - } - - rrc = RapkCreateCQ(dev->rad_handle, total_ntx, RAP_CQTYPE_SEND, - &dev->rad_rdma_cqh); - if (rrc != RAP_SUCCESS) { - CERROR("Can't create rdma cq size %d for device %d: %d\n", - total_ntx, id, rrc); - goto failed_1; - } - - rrc = RapkCreateCQ(dev->rad_handle, - *kranal_tunables.kra_fma_cq_size, - RAP_CQTYPE_RECV, &dev->rad_fma_cqh); - if (rrc != RAP_SUCCESS) { - CERROR("Can't create fma cq size %d for device %d: %d\n", - *kranal_tunables.kra_fma_cq_size, id, rrc); - goto failed_2; - } - - return 0; - - failed_2: - RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); - failed_1: - RapkReleaseDevice(dev->rad_handle); - failed_0: - return -ENODEV; -} - -void -kranal_device_fini(kra_device_t *dev) -{ - LASSERT (list_empty(&dev->rad_ready_conns)); - LASSERT (list_empty(&dev->rad_new_conns)); - LASSERT (dev->rad_nphysmap == 0); - LASSERT (dev->rad_nppphysmap == 0); - LASSERT (dev->rad_nvirtmap == 0); - LASSERT (dev->rad_nobvirtmap == 0); - - LASSERT(dev->rad_scheduler == NULL); - RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cqh); - RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cqh); - RapkReleaseDevice(dev->rad_handle); -} - -void -kranal_shutdown (lnet_ni_t *ni) -{ - int i; - unsigned long flags; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - LASSERT (ni == kranal_data.kra_ni); - LASSERT (ni->ni_data == &kranal_data); - - switch (kranal_data.kra_init) { - default: - CERROR("Unexpected state %d\n", kranal_data.kra_init); - LBUG(); - - case RANAL_INIT_ALL: - /* Prevent new peers from being created */ - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - kranal_data.kra_nonewpeers = 1; - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - /* Remove all existing peers from the peer table */ - kranal_del_peer(LNET_NID_ANY); - - /* Wait for pending conn reqs to be handled */ - i = 2; - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - while (!list_empty(&kranal_data.kra_connd_acceptq)) { - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, - flags); - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for conn reqs to clean up\n"); - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - } - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - /* Wait for all peers to be freed */ - i = 2; - while (atomic_read(&kranal_data.kra_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n */ - "waiting for %d peers to close down\n", - atomic_read(&kranal_data.kra_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case RANAL_INIT_DATA: - break; - } - - /* Peer state all cleaned up BEFORE setting shutdown, so threads don't - * have to worry about shutdown races. NB connections may be created - * while there are still active connds, but these will be temporary - * since peer creation always fails after the listener has started to - * shut down. */ - LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); - - /* Flag threads to terminate */ - kranal_data.kra_shutdown = 1; - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - kra_device_t *dev = &kranal_data.kra_devices[i]; - - spin_lock_irqsave(&dev->rad_lock, flags); - wake_up(&dev->rad_waitq); - spin_unlock_irqrestore(&dev->rad_lock, flags); - } - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - wake_up_all(&kranal_data.kra_reaper_waitq); - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - LASSERT (list_empty(&kranal_data.kra_connd_peers)); - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - wake_up_all(&kranal_data.kra_connd_waitq); - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - /* Wait for threads to exit */ - i = 2; - while (atomic_read(&kranal_data.kra_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read(&kranal_data.kra_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - LASSERT (atomic_read(&kranal_data.kra_npeers) == 0); - if (kranal_data.kra_peers != NULL) { - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) - LASSERT (list_empty(&kranal_data.kra_peers[i])); - - LIBCFS_FREE(kranal_data.kra_peers, - sizeof (struct list_head) * - kranal_data.kra_peer_hash_size); - } - - LASSERT (atomic_read(&kranal_data.kra_nconns) == 0); - if (kranal_data.kra_conns != NULL) { - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) - LASSERT (list_empty(&kranal_data.kra_conns[i])); - - LIBCFS_FREE(kranal_data.kra_conns, - sizeof (struct list_head) * - kranal_data.kra_conn_hash_size); - } - - for (i = 0; i < kranal_data.kra_ndevs; i++) - kranal_device_fini(&kranal_data.kra_devices[i]); - - kranal_free_txdescs(&kranal_data.kra_idle_txs); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - kranal_data.kra_init = RANAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kranal_startup (lnet_ni_t *ni) -{ - struct timeval tv; - int pkmem = atomic_read(&libcfs_kmemory); - int rc; - int i; - kra_device_t *dev; - - LASSERT (ni->ni_lnd == &the_kralnd); - - /* Only 1 instance supported */ - if (kranal_data.kra_init != RANAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (lnet_set_ip_niaddr(ni) != 0) { - CERROR ("Can't determine my NID\n"); - return -EPERM; - } - - if (*kranal_tunables.kra_credits > *kranal_tunables.kra_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kranal_tunables.kra_credits, - *kranal_tunables.kra_ntx); - return -EINVAL; - } - - memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */ - - ni->ni_maxtxcredits = *kranal_tunables.kra_credits; - ni->ni_peertxcredits = *kranal_tunables.kra_peercredits; - - ni->ni_data = &kranal_data; - kranal_data.kra_ni = ni; - - /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and - * a unique (for all time) connstamp so we can uniquely identify - * the sender. The connstamp is an incrementing counter - * initialised with seconds + microseconds at startup time. So we - * rely on NOT creating connections more frequently on average than - * 1MHz to ensure we don't use old connstamps when we reboot. */ - do_gettimeofday(&tv); - kranal_data.kra_connstamp = - kranal_data.kra_peerstamp = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - rwlock_init(&kranal_data.kra_global_lock); - - for (i = 0; i < RANAL_MAXDEVS; i++ ) { - kra_device_t *dev = &kranal_data.kra_devices[i]; - - dev->rad_idx = i; - INIT_LIST_HEAD(&dev->rad_ready_conns); - INIT_LIST_HEAD(&dev->rad_new_conns); - init_waitqueue_head(&dev->rad_waitq); - spin_lock_init(&dev->rad_lock); - } - - kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT; - init_waitqueue_head(&kranal_data.kra_reaper_waitq); - spin_lock_init(&kranal_data.kra_reaper_lock); - - INIT_LIST_HEAD(&kranal_data.kra_connd_acceptq); - INIT_LIST_HEAD(&kranal_data.kra_connd_peers); - init_waitqueue_head(&kranal_data.kra_connd_waitq); - spin_lock_init(&kranal_data.kra_connd_lock); - - INIT_LIST_HEAD(&kranal_data.kra_idle_txs); - spin_lock_init(&kranal_data.kra_tx_lock); - - /* OK to call kranal_api_shutdown() to cleanup now */ - kranal_data.kra_init = RANAL_INIT_DATA; - PORTAL_MODULE_USE; - - kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(kranal_data.kra_peers, - sizeof(struct list_head) * kranal_data.kra_peer_hash_size); - if (kranal_data.kra_peers == NULL) - goto failed; - - for (i = 0; i < kranal_data.kra_peer_hash_size; i++) - INIT_LIST_HEAD(&kranal_data.kra_peers[i]); - - kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE; - LIBCFS_ALLOC(kranal_data.kra_conns, - sizeof(struct list_head) * kranal_data.kra_conn_hash_size); - if (kranal_data.kra_conns == NULL) - goto failed; - - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) - INIT_LIST_HEAD(&kranal_data.kra_conns[i]); - - rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, - *kranal_tunables.kra_ntx); - if (rc != 0) - goto failed; - - rc = kranal_thread_start(kranal_reaper, NULL); - if (rc != 0) { - CERROR("Can't spawn ranal reaper: %d\n", rc); - goto failed; - } - - for (i = 0; i < *kranal_tunables.kra_n_connd; i++) { - rc = kranal_thread_start(kranal_connd, (void *)(unsigned long)i); - if (rc != 0) { - CERROR("Can't spawn ranal connd[%d]: %d\n", - i, rc); - goto failed; - } - } - - LASSERT (kranal_data.kra_ndevs == 0); - - /* Use all available RapidArray devices */ - for (i = 0; i < RANAL_MAXDEVS; i++) { - dev = &kranal_data.kra_devices[kranal_data.kra_ndevs]; - - rc = kranal_device_init(kranal_devids[i], dev); - if (rc == 0) - kranal_data.kra_ndevs++; - } - - if (kranal_data.kra_ndevs == 0) { - CERROR("Can't initialise any RapidArray devices\n"); - goto failed; - } - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - dev = &kranal_data.kra_devices[i]; - rc = kranal_thread_start(kranal_scheduler, dev); - if (rc != 0) { - CERROR("Can't spawn ranal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* flag everything initialised */ - kranal_data.kra_init = RANAL_INIT_ALL; - /*****************************************************/ - - CDEBUG(D_MALLOC, "initial kmem %d\n", pkmem); - return 0; - - failed: - kranal_shutdown(ni); - return -ENETDOWN; -} - -void __exit -kranal_module_fini (void) -{ - lnet_unregister_lnd(&the_kralnd); - kranal_tunables_fini(); -} - -int __init -kranal_module_init (void) -{ - int rc; - - rc = kranal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kralnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel RapidArray LND v0.01"); -MODULE_LICENSE("GPL"); - -module_init(kranal_module_init); -module_exit(kranal_module_fini); diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h deleted file mode 100644 index 300cf40b92c725351d3d8cf4df1b9e8893a0ec96..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd.h +++ /dev/null @@ -1,455 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -#include <rapl.h> - -/* tunables determined at compile time */ -#define RANAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define RANAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define RANAL_CONN_HASH_SIZE 101 /* # conn lists */ - -#define RANAL_MIN_TIMEOUT 5 /* minimum timeout interval (seconds) */ -#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2) /* timeout -> keepalive interval */ - -/* fixed constants */ -#define RANAL_MAXDEVS 2 /* max # devices RapidArray supports */ -#define RANAL_FMA_MAX_PREFIX 232 /* max bytes in FMA "Prefix" we can use */ -#define RANAL_FMA_MAX_DATA ((7<<10)-256) /* Max FMA MSG is 7K including prefix */ - - -typedef struct -{ - int *kra_n_connd; /* # connection daemons */ - int *kra_min_reconnect_interval; /* first failed connection retry... */ - int *kra_max_reconnect_interval; /* ...exponentially increasing to this */ - int *kra_ntx; /* # tx descs */ - int *kra_credits; /* # concurrent sends */ - int *kra_peercredits; /* # concurrent sends to 1 peer */ - int *kra_fma_cq_size; /* # entries in receive CQ */ - int *kra_timeout; /* comms timeout (seconds) */ - int *kra_max_immediate; /* immediate payload breakpoint */ - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kra_sysctl; /* sysctl interface */ -#endif -} kra_tunables_t; - -typedef struct -{ - RAP_PVOID rad_handle; /* device handle */ - RAP_PVOID rad_fma_cqh; /* FMA completion queue handle */ - RAP_PVOID rad_rdma_cqh; /* rdma completion queue handle */ - int rad_id; /* device id */ - int rad_idx; /* index in kra_devices */ - int rad_ready; /* set by device callback */ - struct list_head rad_ready_conns;/* connections ready to tx/rx */ - struct list_head rad_new_conns; /* new connections to complete */ - wait_queue_head_t rad_waitq; /* scheduler waits here */ - spinlock_t rad_lock; /* serialise */ - void *rad_scheduler; /* scheduling thread */ - unsigned int rad_nphysmap; /* # phys mappings */ - unsigned int rad_nppphysmap; /* # phys pages mapped */ - unsigned int rad_nvirtmap; /* # virt mappings */ - unsigned long rad_nobvirtmap; /* # virt bytes mapped */ -} kra_device_t; - -typedef struct -{ - int kra_init; /* initialisation state */ - int kra_shutdown; /* shut down? */ - atomic_t kra_nthreads; /* # live threads */ - lnet_ni_t *kra_ni; /* _the_ nal instance */ - - kra_device_t kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq etc */ - int kra_ndevs; /* # devices */ - - rwlock_t kra_global_lock; /* stabilize peer/conn ops */ - - struct list_head *kra_peers; /* hash table of all my known peers */ - int kra_peer_hash_size; /* size of kra_peers */ - atomic_t kra_npeers; /* # peers extant */ - int kra_nonewpeers; /* prevent new peers */ - - struct list_head *kra_conns; /* conns hashed by cqid */ - int kra_conn_hash_size; /* size of kra_conns */ - __u64 kra_peerstamp; /* when I started up */ - __u64 kra_connstamp; /* conn stamp generator */ - int kra_next_cqid; /* cqid generator */ - atomic_t kra_nconns; /* # connections extant */ - - long kra_new_min_timeout; /* minimum timeout on any new conn */ - wait_queue_head_t kra_reaper_waitq; /* reaper sleeps here */ - spinlock_t kra_reaper_lock; /* serialise */ - - struct list_head kra_connd_peers; /* peers waiting for a connection */ - struct list_head kra_connd_acceptq; /* accepted sockets to handshake */ - wait_queue_head_t kra_connd_waitq; /* connection daemons sleep here */ - spinlock_t kra_connd_lock; /* serialise */ - - struct list_head kra_idle_txs; /* idle tx descriptors */ - __u64 kra_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kra_tx_lock; /* serialise */ -} kra_data_t; - -#define RANAL_INIT_NOTHING 0 -#define RANAL_INIT_DATA 1 -#define RANAL_INIT_ALL 2 - -typedef struct kra_acceptsock /* accepted socket queued for connd */ -{ - struct list_head ras_list; /* queue for attention */ - struct socket *ras_sock; /* the accepted socket */ -} kra_acceptsock_t; - -/************************************************************************ - * Wire message structs. These are sent in sender's byte order - * (i.e. receiver checks magic and flips if required). - */ - -typedef struct kra_connreq /* connection request/response */ -{ /* (sent via socket) */ - __u32 racr_magic; /* I'm an ranal connreq */ - __u16 racr_version; /* this is my version number */ - __u16 racr_devid; /* sender's device ID */ - __u64 racr_srcnid; /* sender's NID */ - __u64 racr_dstnid; /* who sender expects to listen */ - __u64 racr_peerstamp; /* sender's instance stamp */ - __u64 racr_connstamp; /* sender's connection stamp */ - __u32 racr_timeout; /* sender's timeout */ - RAP_RI_PARAMETERS racr_riparams; /* sender's endpoint info */ -} kra_connreq_t; - -typedef struct -{ - RAP_MEM_KEY rard_key; - RAP_PVOID64 rard_addr; - RAP_UINT32 rard_nob; -} kra_rdma_desc_t; - -typedef struct -{ - lnet_hdr_t raim_hdr; /* portals header */ - /* Portals payload is in FMA "Message Data" */ -} kra_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t raprm_hdr; /* portals header */ - __u64 raprm_cookie; /* opaque completion cookie */ -} kra_putreq_msg_t; - -typedef struct -{ - __u64 rapam_src_cookie; /* reflected completion cookie */ - __u64 rapam_dst_cookie; /* opaque completion cookie */ - kra_rdma_desc_t rapam_desc; /* sender's sink buffer */ -} kra_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ragm_hdr; /* portals header */ - __u64 ragm_cookie; /* opaque completion cookie */ - kra_rdma_desc_t ragm_desc; /* sender's sink buffer */ -} kra_get_msg_t; - -typedef struct -{ - __u64 racm_cookie; /* reflected completion cookie */ -} kra_completion_msg_t; - -typedef struct /* NB must fit in FMA "Prefix" */ -{ - __u32 ram_magic; /* I'm an ranal message */ - __u16 ram_version; /* this is my version number */ - __u16 ram_type; /* msg type */ - __u64 ram_srcnid; /* sender's NID */ - __u64 ram_connstamp; /* sender's connection stamp */ - union { - kra_immediate_msg_t immediate; - kra_putreq_msg_t putreq; - kra_putack_msg_t putack; - kra_get_msg_t get; - kra_completion_msg_t completion; - } ram_u; - __u32 ram_seq; /* incrementing sequence number */ -} kra_msg_t; - -#define RANAL_MSG_MAGIC LNET_PROTO_RA_MAGIC /* unique magic */ -#define RANAL_MSG_VERSION 1 /* current protocol version */ - -#define RANAL_MSG_FENCE 0x80 /* fence RDMA */ - -#define RANAL_MSG_NONE 0x00 /* illegal message */ -#define RANAL_MSG_NOOP 0x01 /* empty ram_u (keepalive) */ -#define RANAL_MSG_IMMEDIATE 0x02 /* ram_u.immediate */ -#define RANAL_MSG_PUT_REQ 0x03 /* ram_u.putreq (src->sink) */ -#define RANAL_MSG_PUT_NAK 0x04 /* ram_u.completion (no PUT match: sink->src) */ -#define RANAL_MSG_PUT_ACK 0x05 /* ram_u.putack (PUT matched: sink->src) */ -#define RANAL_MSG_PUT_DONE 0x86 /* ram_u.completion (src->sink) */ -#define RANAL_MSG_GET_REQ 0x07 /* ram_u.get (sink->src) */ -#define RANAL_MSG_GET_NAK 0x08 /* ram_u.completion (no GET match: src->sink) */ -#define RANAL_MSG_GET_DONE 0x89 /* ram_u.completion (src->sink) */ -#define RANAL_MSG_CLOSE 0x8a /* empty ram_u */ - -/***********************************************************************/ - -typedef struct kra_tx /* message descriptor */ -{ - struct list_head tx_list; /* queue on idle_txs/rac_sendq/rac_waitq */ - struct kra_conn *tx_conn; /* owning conn */ - lnet_msg_t *tx_lntmsg[2]; /* ptl msgs to finalize on completion */ - unsigned long tx_qtime; /* when tx started to wait for something (jiffies) */ - int tx_nob; /* # bytes of payload */ - int tx_buftype; /* payload buffer type */ - void *tx_buffer; /* source/sink buffer */ - int tx_phys_offset; /* first page offset (if phys) */ - int tx_phys_npages; /* # physical pages */ - RAP_PHYS_REGION *tx_phys; /* page descriptors */ - RAP_MEM_KEY tx_map_key; /* mapping key */ - RAP_RDMA_DESCRIPTOR tx_rdma_desc; /* rdma descriptor */ - __u64 tx_cookie; /* identify this tx to peer */ - kra_msg_t tx_msg; /* FMA message buffer */ -} kra_tx_t; - -#define RANAL_BUF_NONE 0 /* buffer type not set */ -#define RANAL_BUF_IMMEDIATE 1 /* immediate data */ -#define RANAL_BUF_PHYS_UNMAPPED 2 /* physical: not mapped yet */ -#define RANAL_BUF_PHYS_MAPPED 3 /* physical: mapped already */ -#define RANAL_BUF_VIRT_UNMAPPED 4 /* virtual: not mapped yet */ -#define RANAL_BUF_VIRT_MAPPED 5 /* virtual: mapped already */ - -typedef struct kra_conn -{ - struct kra_peer *rac_peer; /* owning peer */ - struct list_head rac_list; /* stash on peer's conn list */ - struct list_head rac_hashlist; /* stash in connection hash table */ - struct list_head rac_schedlist; /* schedule (on rad_???_conns) for attention */ - struct list_head rac_fmaq; /* txs queued for FMA */ - struct list_head rac_rdmaq; /* txs awaiting RDMA completion */ - struct list_head rac_replyq; /* txs awaiting replies */ - __u64 rac_peerstamp; /* peer's unique stamp */ - __u64 rac_peer_connstamp; /* peer's unique connection stamp */ - __u64 rac_my_connstamp; /* my unique connection stamp */ - unsigned long rac_last_tx; /* when I last sent an FMA message (jiffies) */ - unsigned long rac_last_rx; /* when I last received an FMA messages (jiffies) */ - long rac_keepalive; /* keepalive interval (seconds) */ - long rac_timeout; /* infer peer death if no rx for this many seconds */ - __u32 rac_cqid; /* my completion callback id (non-unique) */ - __u32 rac_tx_seq; /* tx msg sequence number */ - __u32 rac_rx_seq; /* rx msg sequence number */ - atomic_t rac_refcount; /* # users */ - unsigned int rac_close_sent; /* I've sent CLOSE */ - unsigned int rac_close_recvd; /* I've received CLOSE */ - unsigned int rac_state; /* connection state */ - unsigned int rac_scheduled; /* being attented to */ - spinlock_t rac_lock; /* serialise */ - kra_device_t *rac_device; /* which device */ - RAP_PVOID rac_rihandle; /* RA endpoint */ - kra_msg_t *rac_rxmsg; /* incoming message (FMA prefix) */ - kra_msg_t rac_msg; /* keepalive/CLOSE message buffer */ -} kra_conn_t; - -#define RANAL_CONN_ESTABLISHED 0 -#define RANAL_CONN_CLOSING 1 -#define RANAL_CONN_CLOSED 2 - -typedef struct kra_peer -{ - struct list_head rap_list; /* stash on global peer list */ - struct list_head rap_connd_list; /* schedule on kra_connd_peers */ - struct list_head rap_conns; /* all active connections */ - struct list_head rap_tx_queue; /* msgs waiting for a conn */ - lnet_nid_t rap_nid; /* who's on the other end(s) */ - __u32 rap_ip; /* IP address of peer */ - int rap_port; /* port on which peer listens */ - atomic_t rap_refcount; /* # users */ - int rap_persistence; /* "known" peer refs */ - int rap_connecting; /* connection forming */ - unsigned long rap_reconnect_time; /* CURRENT_SECONDS when reconnect OK */ - unsigned long rap_reconnect_interval; /* exponential backoff */ -} kra_peer_t; - -extern kra_data_t kranal_data; -extern kra_tunables_t kranal_tunables; - -extern void kranal_destroy_peer(kra_peer_t *peer); -extern void kranal_destroy_conn(kra_conn_t *conn); - -static inline void -kranal_peer_addref(kra_peer_t *peer) -{ - CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - atomic_inc(&peer->rap_refcount); -} - -static inline void -kranal_peer_decref(kra_peer_t *peer) -{ - CDEBUG(D_NET, "%p->%s\n", peer, libcfs_nid2str(peer->rap_nid)); - LASSERT(atomic_read(&peer->rap_refcount) > 0); - if (atomic_dec_and_test(&peer->rap_refcount)) - kranal_destroy_peer(peer); -} - -static inline struct list_head * -kranal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size; - - return (&kranal_data.kra_peers[hash]); -} - -static inline int -kranal_peer_active(kra_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->rap_list)); -} - -static inline void -kranal_conn_addref(kra_conn_t *conn) -{ - CDEBUG(D_NET, "%p->%s\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid)); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - atomic_inc(&conn->rac_refcount); -} - -static inline void -kranal_conn_decref(kra_conn_t *conn) -{ - CDEBUG(D_NET, "%p->%s\n", conn, - libcfs_nid2str(conn->rac_peer->rap_nid)); - LASSERT(atomic_read(&conn->rac_refcount) > 0); - if (atomic_dec_and_test(&conn->rac_refcount)) - kranal_destroy_conn(conn); -} - -static inline struct list_head * -kranal_cqid2connlist (__u32 cqid) -{ - unsigned int hash = cqid % kranal_data.kra_conn_hash_size; - - return (&kranal_data.kra_conns [hash]); -} - -static inline kra_conn_t * -kranal_cqid2conn_locked (__u32 cqid) -{ - struct list_head *conns = kranal_cqid2connlist(cqid); - struct list_head *tmp; - kra_conn_t *conn; - - list_for_each(tmp, conns) { - conn = list_entry(tmp, kra_conn_t, rac_hashlist); - - if (conn->rac_cqid == cqid) - return conn; - } - - return NULL; -} - -static inline int -kranal_tx_mapped (kra_tx_t *tx) -{ - return (tx->tx_buftype == RANAL_BUF_VIRT_MAPPED || - tx->tx_buftype == RANAL_BUF_PHYS_MAPPED); -} - -int kranal_startup (lnet_ni_t *ni); -void kranal_shutdown (lnet_ni_t *ni); -int kranal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kranal_eager_recv(lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kranal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kranal_accept(lnet_ni_t *ni, struct socket *sock); - -extern void kranal_free_acceptsock (kra_acceptsock_t *ras); -extern int kranal_listener_procint (ctl_table *table, - int write, struct file *filp, - void *buffer, size_t *lenp); -extern void kranal_update_reaper_timeout (long timeout); -extern void kranal_tx_done (kra_tx_t *tx, int completion); -extern void kranal_unlink_peer_locked (kra_peer_t *peer); -extern void kranal_schedule_conn (kra_conn_t *conn); -extern int kranal_create_peer (kra_peer_t **peerp, lnet_nid_t nid); -extern int kranal_add_persistent_peer (lnet_nid_t nid, __u32 ip, int port); -extern kra_peer_t *kranal_find_peer_locked (lnet_nid_t nid); -extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx); -extern int kranal_del_peer (lnet_nid_t nid); -extern void kranal_device_callback (RAP_INT32 devid, RAP_PVOID arg); -extern int kranal_thread_start (int(*fn)(void *arg), void *arg); -extern int kranal_connd (void *arg); -extern int kranal_reaper (void *arg); -extern int kranal_scheduler (void *arg); -extern void kranal_close_conn_locked (kra_conn_t *conn, int error); -extern void kranal_close_conn (kra_conn_t *conn, int error); -extern void kranal_terminate_conn_locked (kra_conn_t *conn); -extern void kranal_connect (kra_peer_t *peer); -extern int kranal_conn_handshake (struct socket *sock, kra_peer_t *peer); -extern int kranal_tunables_init(void); -extern void kranal_tunables_fini(void); -extern void kranal_init_msg(kra_msg_t *msg, int type); diff --git a/lnet/klnds/ralnd/ralnd_cb.c b/lnet/klnds/ralnd/ralnd_cb.c deleted file mode 100644 index 969efd269fef201623b96527cd7a418d108048a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd_cb.c +++ /dev/null @@ -1,2036 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "ralnd.h" - -void -kranal_device_callback(RAP_INT32 devid, RAP_PVOID arg) -{ - kra_device_t *dev; - int i; - unsigned long flags; - - CDEBUG(D_NET, "callback for device %d\n", devid); - - for (i = 0; i < kranal_data.kra_ndevs; i++) { - - dev = &kranal_data.kra_devices[i]; - if (dev->rad_id != devid) - continue; - - spin_lock_irqsave(&dev->rad_lock, flags); - - if (!dev->rad_ready) { - dev->rad_ready = 1; - wake_up(&dev->rad_waitq); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); - return; - } - - CWARN("callback for unknown device %d\n", devid); -} - -void -kranal_schedule_conn(kra_conn_t *conn) -{ - kra_device_t *dev = conn->rac_device; - unsigned long flags; - - spin_lock_irqsave(&dev->rad_lock, flags); - - if (!conn->rac_scheduled) { - kranal_conn_addref(conn); /* +1 ref for scheduler */ - conn->rac_scheduled = 1; - list_add_tail(&conn->rac_schedlist, &dev->rad_ready_conns); - wake_up(&dev->rad_waitq); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); -} - -kra_tx_t * -kranal_get_idle_tx (void) -{ - unsigned long flags; - kra_tx_t *tx; - - spin_lock_irqsave(&kranal_data.kra_tx_lock, flags); - - if (list_empty(&kranal_data.kra_idle_txs)) { - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - return NULL; - } - - tx = list_entry(kranal_data.kra_idle_txs.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, but we've - * got a lock right now... */ - tx->tx_cookie = kranal_data.kra_next_tx_cookie++; - - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -void -kranal_init_msg(kra_msg_t *msg, int type) -{ - msg->ram_magic = RANAL_MSG_MAGIC; - msg->ram_version = RANAL_MSG_VERSION; - msg->ram_type = type; - msg->ram_srcnid = kranal_data.kra_ni->ni_nid; - /* ram_connstamp gets set when FMA is sent */ -} - -kra_tx_t * -kranal_new_tx_msg (int type) -{ - kra_tx_t *tx = kranal_get_idle_tx(); - - if (tx != NULL) - kranal_init_msg(&tx->tx_msg, type); - - return tx; -} - -int -kranal_setup_immediate_buffer (kra_tx_t *tx, - unsigned int niov, struct iovec *iov, - int offset, int nob) - -{ - /* For now this is almost identical to kranal_setup_virt_buffer, but we - * could "flatten" the payload into a single contiguous buffer ready - * for sending direct over an FMA if we ever needed to. */ - - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - LASSERT (nob >= 0); - - if (nob == 0) { - tx->tx_buffer = NULL; - } else { - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR("Can't handle multiple vaddr fragments\n"); - return -EMSGSIZE; - } - - tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset); - } - - tx->tx_buftype = RANAL_BUF_IMMEDIATE; - tx->tx_nob = nob; - return 0; -} - -int -kranal_setup_virt_buffer (kra_tx_t *tx, - unsigned int niov, struct iovec *iov, - int offset, int nob) - -{ - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR("Can't handle multiple vaddr fragments\n"); - return -EMSGSIZE; - } - - tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED; - tx->tx_nob = nob; - tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset); - return 0; -} - -int -kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, lnet_kiov_t *kiov, - int offset, int nob) -{ - RAP_PHYS_REGION *phys = tx->tx_phys; - int resid; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (tx->tx_buftype == RANAL_BUF_NONE); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED; - tx->tx_nob = nob; - tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset)); - - phys->Address = lnet_page2phys(kiov->kiov_page); - phys++; - - resid = nob - (kiov->kiov_len - offset); - while (resid > 0) { - kiov++; - nkiov--; - LASSERT (nkiov > 0); - - if (kiov->kiov_offset != 0 || - ((resid > PAGE_SIZE) && - kiov->kiov_len < PAGE_SIZE)) { - /* Can't have gaps */ - CERROR("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - (int)(phys - tx->tx_phys), - kiov->kiov_offset, kiov->kiov_len); - return -EINVAL; - } - - if ((phys - tx->tx_phys) == LNET_MAX_IOV) { - CERROR ("payload too big (%d)\n", (int)(phys - tx->tx_phys)); - return -EMSGSIZE; - } - - phys->Address = lnet_page2phys(kiov->kiov_page); - phys++; - - resid -= PAGE_SIZE; - } - - tx->tx_phys_npages = phys - tx->tx_phys; - return 0; -} - -static inline int -kranal_setup_rdma_buffer (kra_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - int offset, int nob) -{ - LASSERT ((iov == NULL) != (kiov == NULL)); - - if (kiov != NULL) - return kranal_setup_phys_buffer(tx, niov, kiov, offset, nob); - - return kranal_setup_virt_buffer(tx, niov, iov, offset, nob); -} - -int -kranal_map_buffer (kra_tx_t *tx) -{ - kra_conn_t *conn = tx->tx_conn; - kra_device_t *dev = conn->rac_device; - RAP_RETURN rrc; - - LASSERT (current == dev->rad_scheduler); - - switch (tx->tx_buftype) { - default: - LBUG(); - - case RANAL_BUF_NONE: - case RANAL_BUF_IMMEDIATE: - case RANAL_BUF_PHYS_MAPPED: - case RANAL_BUF_VIRT_MAPPED: - return 0; - - case RANAL_BUF_PHYS_UNMAPPED: - rrc = RapkRegisterPhys(dev->rad_handle, - tx->tx_phys, tx->tx_phys_npages, - &tx->tx_map_key); - if (rrc != RAP_SUCCESS) { - CERROR ("Can't map %d pages: dev %d " - "phys %u pp %u, virt %u nob %lu\n", - tx->tx_phys_npages, dev->rad_id, - dev->rad_nphysmap, dev->rad_nppphysmap, - dev->rad_nvirtmap, dev->rad_nobvirtmap); - return -ENOMEM; /* assume insufficient resources */ - } - - dev->rad_nphysmap++; - dev->rad_nppphysmap += tx->tx_phys_npages; - - tx->tx_buftype = RANAL_BUF_PHYS_MAPPED; - return 0; - - case RANAL_BUF_VIRT_UNMAPPED: - rrc = RapkRegisterMemory(dev->rad_handle, - tx->tx_buffer, tx->tx_nob, - &tx->tx_map_key); - if (rrc != RAP_SUCCESS) { - CERROR ("Can't map %d bytes: dev %d " - "phys %u pp %u, virt %u nob %lu\n", - tx->tx_nob, dev->rad_id, - dev->rad_nphysmap, dev->rad_nppphysmap, - dev->rad_nvirtmap, dev->rad_nobvirtmap); - return -ENOMEM; /* assume insufficient resources */ - } - - dev->rad_nvirtmap++; - dev->rad_nobvirtmap += tx->tx_nob; - - tx->tx_buftype = RANAL_BUF_VIRT_MAPPED; - return 0; - } -} - -void -kranal_unmap_buffer (kra_tx_t *tx) -{ - kra_device_t *dev; - RAP_RETURN rrc; - - switch (tx->tx_buftype) { - default: - LBUG(); - - case RANAL_BUF_NONE: - case RANAL_BUF_IMMEDIATE: - case RANAL_BUF_PHYS_UNMAPPED: - case RANAL_BUF_VIRT_UNMAPPED: - break; - - case RANAL_BUF_PHYS_MAPPED: - LASSERT (tx->tx_conn != NULL); - dev = tx->tx_conn->rac_device; - LASSERT (current == dev->rad_scheduler); - rrc = RapkDeregisterMemory(dev->rad_handle, NULL, - &tx->tx_map_key); - LASSERT (rrc == RAP_SUCCESS); - - dev->rad_nphysmap--; - dev->rad_nppphysmap -= tx->tx_phys_npages; - - tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED; - break; - - case RANAL_BUF_VIRT_MAPPED: - LASSERT (tx->tx_conn != NULL); - dev = tx->tx_conn->rac_device; - LASSERT (current == dev->rad_scheduler); - rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer, - &tx->tx_map_key); - LASSERT (rrc == RAP_SUCCESS); - - dev->rad_nvirtmap--; - dev->rad_nobvirtmap -= tx->tx_nob; - - tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED; - break; - } -} - -void -kranal_tx_done (kra_tx_t *tx, int completion) -{ - lnet_msg_t *lnetmsg[2]; - unsigned long flags; - int i; - - LASSERT (!in_interrupt()); - - kranal_unmap_buffer(tx); - - lnetmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lnetmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - tx->tx_buftype = RANAL_BUF_NONE; - tx->tx_msg.ram_type = RANAL_MSG_NONE; - tx->tx_conn = NULL; - - spin_lock_irqsave(&kranal_data.kra_tx_lock, flags); - - list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs); - - spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags); - - /* finalize AFTER freeing lnet msgs */ - for (i = 0; i < 2; i++) { - if (lnetmsg[i] == NULL) - continue; - - lnet_finalize(kranal_data.kra_ni, lnetmsg[i], completion); - } -} - -kra_conn_t * -kranal_find_conn_locked (kra_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->rap_conns) { - return list_entry(tmp, kra_conn_t, rac_list); - } - - return NULL; -} - -void -kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx) -{ - unsigned long flags; - - tx->tx_conn = conn; - - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_fmaq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); - - kranal_schedule_conn(conn); -} - -void -kranal_launch_tx (kra_tx_t *tx, lnet_nid_t nid) -{ - unsigned long flags; - kra_peer_t *peer; - kra_conn_t *conn; - int rc; - int retry; - rwlock_t *g_lock = &kranal_data.kra_global_lock; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - - for (retry = 0; ; retry = 1) { - - read_lock(g_lock); - - peer = kranal_find_peer_locked(nid); - if (peer != NULL) { - conn = kranal_find_conn_locked(peer); - if (conn != NULL) { - kranal_post_fma(conn, tx); - read_unlock(g_lock); - return; - } - } - - /* Making connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock_irqsave(g_lock, flags); - - peer = kranal_find_peer_locked(nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - kranal_tx_done(tx, -EHOSTUNREACH); - return; - } - - rc = kranal_add_persistent_peer(nid, LNET_NIDADDR(nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - kranal_tx_done(tx, rc); - return; - } - } - - conn = kranal_find_conn_locked(peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kranal_post_fma(conn, tx); - write_unlock_irqrestore(g_lock, flags); - return; - } - - LASSERT (peer->rap_persistence > 0); - - if (!peer->rap_connecting) { - LASSERT (list_empty(&peer->rap_tx_queue)); - - if (!(peer->rap_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->rap_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - kranal_tx_done(tx, -EHOSTUNREACH); - return; - } - - peer->rap_connecting = 1; - kranal_peer_addref(peer); /* extra ref for connd */ - - spin_lock(&kranal_data.kra_connd_lock); - - list_add_tail(&peer->rap_connd_list, - &kranal_data.kra_connd_peers); - wake_up(&kranal_data.kra_connd_waitq); - - spin_unlock(&kranal_data.kra_connd_lock); - } - - /* A connection is being established; queue the message... */ - list_add_tail(&tx->tx_list, &peer->rap_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -void -kranal_rdma(kra_tx_t *tx, int type, - kra_rdma_desc_t *sink, int nob, __u64 cookie) -{ - kra_conn_t *conn = tx->tx_conn; - RAP_RETURN rrc; - unsigned long flags; - - LASSERT (kranal_tx_mapped(tx)); - LASSERT (nob <= sink->rard_nob); - LASSERT (nob <= tx->tx_nob); - - /* No actual race with scheduler sending CLOSE (I'm she!) */ - LASSERT (current == conn->rac_device->rad_scheduler); - - memset(&tx->tx_rdma_desc, 0, sizeof(tx->tx_rdma_desc)); - tx->tx_rdma_desc.SrcPtr.AddressBits = (__u64)((unsigned long)tx->tx_buffer); - tx->tx_rdma_desc.SrcKey = tx->tx_map_key; - tx->tx_rdma_desc.DstPtr = sink->rard_addr; - tx->tx_rdma_desc.DstKey = sink->rard_key; - tx->tx_rdma_desc.Length = nob; - tx->tx_rdma_desc.AppPtr = tx; - - /* prep final completion message */ - kranal_init_msg(&tx->tx_msg, type); - tx->tx_msg.ram_u.completion.racm_cookie = cookie; - - if (nob == 0) { /* Immediate completion */ - kranal_post_fma(conn, tx); - return; - } - - LASSERT (!conn->rac_close_sent); /* Don't lie (CLOSE == RDMA idle) */ - - rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc); - LASSERT (rrc == RAP_SUCCESS); - - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_rdmaq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); -} - -int -kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob) -{ - __u32 nob_received = nob; - RAP_RETURN rrc; - - LASSERT (conn->rac_rxmsg != NULL); - CDEBUG(D_NET, "Consuming %p\n", conn); - - rrc = RapkFmaCopyOut(conn->rac_rihandle, buffer, - &nob_received, sizeof(kra_msg_t)); - LASSERT (rrc == RAP_SUCCESS); - - conn->rac_rxmsg = NULL; - - if (nob_received < nob) { - CWARN("Incomplete immediate msg from %s: expected %d, got %d\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - nob, nob_received); - return -EPROTO; - } - - return 0; -} - -int -kranal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kra_tx_t *tx; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - nob, niov, libcfs_id2str(target)); - - LASSERT (nob == 0 || niov > 0); - LASSERT (niov <= LNET_MAX_IOV); - - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(kiov != NULL && iov != NULL)); - - if (routing) { - CERROR ("Can't route\n"); - return -EIO; - } - - switch(type) { - default: - LBUG(); - - case LNET_MSG_ACK: - LASSERT (nob == 0); - break; - - case LNET_MSG_GET: - LASSERT (niov == 0); - LASSERT (nob == 0); - /* We have to consider the eventual sink buffer rather than any - * payload passed here (there isn't any, and strictly, looking - * inside lntmsg is a layering violation). We send a simple - * IMMEDIATE GET if the sink buffer is mapped already and small - * enough for FMA */ - - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0 && - lntmsg->msg_md->md_length <= RANAL_FMA_MAX_DATA && - lntmsg->msg_md->md_length <= *kranal_tunables.kra_max_immediate) - break; /* send IMMEDIATE */ - - tx = kranal_new_tx_msg(RANAL_MSG_GET_REQ); - if (tx == NULL) - return -ENOMEM; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kranal_setup_virt_buffer(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kranal_setup_phys_buffer(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[1] = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET to %s\n", - libcfs_nid2str(target.nid)); - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; - tx->tx_msg.ram_u.get.ragm_hdr = *hdr; - /* rest of tx_msg is setup just before it is sent */ - kranal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - if (kiov == NULL && /* not paged */ - nob <= RANAL_FMA_MAX_DATA && /* small enough */ - nob <= *kranal_tunables.kra_max_immediate) - break; /* send IMMEDIATE */ - - tx = kranal_new_tx_msg(RANAL_MSG_PUT_REQ); - if (tx == NULL) - return -ENOMEM; - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; - tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr; - /* rest of tx_msg is setup just before it is sent */ - kranal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (kiov == NULL); - LASSERT (nob <= RANAL_FMA_MAX_DATA); - - tx = kranal_new_tx_msg(RANAL_MSG_IMMEDIATE); - if (tx == NULL) - return -ENOMEM; - - rc = kranal_setup_immediate_buffer(tx, niov, iov, offset, nob); - if (rc != 0) { - kranal_tx_done(tx, rc); - return -EIO; - } - - tx->tx_msg.ram_u.immediate.raim_hdr = *hdr; - tx->tx_lntmsg[0] = lntmsg; - kranal_launch_tx(tx, target.nid); - return 0; -} - -void -kranal_reply(lnet_ni_t *ni, kra_conn_t *conn, lnet_msg_t *lntmsg) -{ - kra_msg_t *rxmsg = conn->rac_rxmsg; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kra_tx_t *tx; - int rc; - - tx = kranal_get_idle_tx(); - if (tx == NULL) - goto failed_0; - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, nob); - if (rc != 0) - goto failed_1; - - tx->tx_conn = conn; - - rc = kranal_map_buffer(tx); - if (rc != 0) - goto failed_1; - - tx->tx_lntmsg[0] = lntmsg; - - kranal_rdma(tx, RANAL_MSG_GET_DONE, - &rxmsg->ram_u.get.ragm_desc, nob, - rxmsg->ram_u.get.ragm_cookie); - return; - - failed_1: - kranal_tx_done(tx, -EIO); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kranal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kra_conn_t *conn = (kra_conn_t *)private; - - LCONSOLE_ERROR("Dropping message from %s: no buffers free.\n", - libcfs_nid2str(conn->rac_peer->rap_nid)); - - return -EDEADLK; -} - -int -kranal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kra_conn_t *conn = private; - kra_msg_t *rxmsg = conn->rac_rxmsg; - kra_tx_t *tx; - void *buffer; - int rc; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - CDEBUG(D_NET, "conn %p, rxmsg %p, lntmsg %p\n", conn, rxmsg, lntmsg); - - switch(rxmsg->ram_type) { - default: - LBUG(); - - case RANAL_MSG_IMMEDIATE: - if (mlen == 0) { - buffer = NULL; - } else if (kiov != NULL) { - CERROR("Can't recv immediate into paged buffer\n"); - return -EIO; - } else { - LASSERT (niov > 0); - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - if (mlen > iov->iov_len - offset) { - CERROR("Can't handle immediate frags\n"); - return -EIO; - } - buffer = ((char *)iov->iov_base) + offset; - } - rc = kranal_consume_rxmsg(conn, buffer, mlen); - lnet_finalize(ni, lntmsg, (rc == 0) ? 0 : -EIO); - return 0; - - case RANAL_MSG_PUT_REQ: - tx = kranal_new_tx_msg(RANAL_MSG_PUT_ACK); - if (tx == NULL) { - kranal_consume_rxmsg(conn, NULL, 0); - return -ENOMEM; - } - - rc = kranal_setup_rdma_buffer(tx, niov, iov, kiov, offset, mlen); - if (rc != 0) { - kranal_tx_done(tx, rc); - kranal_consume_rxmsg(conn, NULL, 0); - return -EIO; - } - - tx->tx_conn = conn; - rc = kranal_map_buffer(tx); - if (rc != 0) { - kranal_tx_done(tx, rc); - kranal_consume_rxmsg(conn, NULL, 0); - return -EIO; - } - - tx->tx_msg.ram_u.putack.rapam_src_cookie = - conn->rac_rxmsg->ram_u.putreq.raprm_cookie; - tx->tx_msg.ram_u.putack.rapam_dst_cookie = tx->tx_cookie; - tx->tx_msg.ram_u.putack.rapam_desc.rard_key = tx->tx_map_key; - tx->tx_msg.ram_u.putack.rapam_desc.rard_addr.AddressBits = - (__u64)((unsigned long)tx->tx_buffer); - tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen; - - tx->tx_lntmsg[0] = lntmsg; /* finalize this on RDMA_DONE */ - - kranal_post_fma(conn, tx); - kranal_consume_rxmsg(conn, NULL, 0); - return 0; - - case RANAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Matched! */ - kranal_reply(ni, conn, lntmsg); - } else { - /* No match */ - tx = kranal_new_tx_msg(RANAL_MSG_GET_NAK); - if (tx != NULL) { - tx->tx_msg.ram_u.completion.racm_cookie = - rxmsg->ram_u.get.ragm_cookie; - kranal_post_fma(conn, tx); - } - } - kranal_consume_rxmsg(conn, NULL, 0); - return 0; - } -} - -int -kranal_thread_start (int(*fn)(void *arg), void *arg) -{ - long pid = kernel_thread(fn, arg, 0); - - if (pid < 0) - return(int)pid; - - atomic_inc(&kranal_data.kra_nthreads); - return 0; -} - -void -kranal_thread_fini (void) -{ - atomic_dec(&kranal_data.kra_nthreads); -} - -int -kranal_check_conn_timeouts (kra_conn_t *conn) -{ - kra_tx_t *tx; - struct list_head *ttmp; - unsigned long flags; - long timeout; - unsigned long now = jiffies; - - LASSERT (conn->rac_state == RANAL_CONN_ESTABLISHED || - conn->rac_state == RANAL_CONN_CLOSING); - - if (!conn->rac_close_sent && - time_after_eq(now, conn->rac_last_tx + conn->rac_keepalive * HZ)) { - /* not sent in a while; schedule conn so scheduler sends a keepalive */ - CDEBUG(D_NET, "Scheduling keepalive %p->%s\n", - conn, libcfs_nid2str(conn->rac_peer->rap_nid)); - kranal_schedule_conn(conn); - } - - timeout = conn->rac_timeout * HZ; - - if (!conn->rac_close_recvd && - time_after_eq(now, conn->rac_last_rx + timeout)) { - CERROR("%s received from %s within %lu seconds\n", - (conn->rac_state == RANAL_CONN_ESTABLISHED) ? - "Nothing" : "CLOSE not", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - conn->rac_last_rx)/HZ); - return -ETIMEDOUT; - } - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) - return 0; - - /* Check the conn's queues are moving. These are "belt+braces" checks, - * in case of hardware/software errors that make this conn seem - * responsive even though it isn't progressing its message queues. */ - - spin_lock_irqsave(&conn->rac_lock, flags); - - list_for_each (ttmp, &conn->rac_fmaq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on fmaq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - list_for_each (ttmp, &conn->rac_rdmaq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on rdmaq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - list_for_each (ttmp, &conn->rac_replyq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - if (time_after_eq(now, tx->tx_qtime + timeout)) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CERROR("tx on replyq for %s blocked %lu seconds\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - (now - tx->tx_qtime)/HZ); - return -ETIMEDOUT; - } - } - - spin_unlock_irqrestore(&conn->rac_lock, flags); - return 0; -} - -void -kranal_reaper_check (int idx, unsigned long *min_timeoutp) -{ - struct list_head *conns = &kranal_data.kra_conns[idx]; - struct list_head *ctmp; - kra_conn_t *conn; - unsigned long flags; - int rc; - - again: - /* NB. We expect to check all the conns and not find any problems, so - * we just use a shared lock while we take a look... */ - read_lock(&kranal_data.kra_global_lock); - - list_for_each (ctmp, conns) { - conn = list_entry(ctmp, kra_conn_t, rac_hashlist); - - if (conn->rac_timeout < *min_timeoutp ) - *min_timeoutp = conn->rac_timeout; - if (conn->rac_keepalive < *min_timeoutp ) - *min_timeoutp = conn->rac_keepalive; - - rc = kranal_check_conn_timeouts(conn); - if (rc == 0) - continue; - - kranal_conn_addref(conn); - read_unlock(&kranal_data.kra_global_lock); - - CERROR("Conn to %s, cqid %d timed out\n", - libcfs_nid2str(conn->rac_peer->rap_nid), - conn->rac_cqid); - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - switch (conn->rac_state) { - default: - LBUG(); - - case RANAL_CONN_ESTABLISHED: - kranal_close_conn_locked(conn, -ETIMEDOUT); - break; - - case RANAL_CONN_CLOSING: - kranal_terminate_conn_locked(conn); - break; - } - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - - kranal_conn_decref(conn); - - /* start again now I've dropped the lock */ - goto again; - } - - read_unlock(&kranal_data.kra_global_lock); -} - -int -kranal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - wait_queue_t wait; - unsigned long flags; - kra_peer_t *peer; - kra_acceptsock_t *ras; - int did_something; - - snprintf(name, sizeof(name), "kranal_connd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - - while (!kranal_data.kra_shutdown) { - did_something = 0; - - if (!list_empty(&kranal_data.kra_connd_acceptq)) { - ras = list_entry(kranal_data.kra_connd_acceptq.next, - kra_acceptsock_t, ras_list); - list_del(&ras->ras_list); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - CDEBUG(D_NET,"About to handshake someone\n"); - - kranal_conn_handshake(ras->ras_sock, NULL); - kranal_free_acceptsock(ras); - - CDEBUG(D_NET,"Finished handshaking someone\n"); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - did_something = 1; - } - - if (!list_empty(&kranal_data.kra_connd_peers)) { - peer = list_entry(kranal_data.kra_connd_peers.next, - kra_peer_t, rap_connd_list); - - list_del_init(&peer->rap_connd_list); - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - kranal_connect(peer); - kranal_peer_decref(peer); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - did_something = 1; - } - - if (did_something) - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kranal_data.kra_connd_waitq, &wait); - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - schedule (); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kranal_data.kra_connd_waitq, &wait); - - spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); - } - - spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); - - kranal_thread_fini(); - return 0; -} - -void -kranal_update_reaper_timeout(long timeout) -{ - unsigned long flags; - - LASSERT (timeout > 0); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - if (timeout < kranal_data.kra_new_min_timeout) - kranal_data.kra_new_min_timeout = timeout; - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); -} - -int -kranal_reaper (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - long timeout; - int i; - int conn_entries = kranal_data.kra_conn_hash_size; - int conn_index = 0; - int base_index = conn_entries - 1; - unsigned long next_check_time = jiffies; - long next_min_timeout = MAX_SCHEDULE_TIMEOUT; - long current_min_timeout = 1; - - cfs_daemonize("kranal_reaper"); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - while (!kranal_data.kra_shutdown) { - /* I wake up every 'p' seconds to check for timeouts on some - * more peers. I try to check every connection 'n' times - * within the global minimum of all keepalive and timeout - * intervals, to ensure I attend to every connection within - * (n+1)/n times its timeout intervals. */ - const int p = 1; - const int n = 3; - unsigned long min_timeout; - int chunk; - - /* careful with the jiffy wrap... */ - timeout = (long)(next_check_time - jiffies); - if (timeout > 0) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kranal_data.kra_reaper_waitq, &wait); - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - schedule_timeout(timeout); - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait); - continue; - } - - if (kranal_data.kra_new_min_timeout != MAX_SCHEDULE_TIMEOUT) { - /* new min timeout set: restart min timeout scan */ - next_min_timeout = MAX_SCHEDULE_TIMEOUT; - base_index = conn_index - 1; - if (base_index < 0) - base_index = conn_entries - 1; - - if (kranal_data.kra_new_min_timeout < current_min_timeout) { - current_min_timeout = kranal_data.kra_new_min_timeout; - CDEBUG(D_NET, "Set new min timeout %ld\n", - current_min_timeout); - } - - kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT; - } - min_timeout = current_min_timeout; - - spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags); - - LASSERT (min_timeout > 0); - - /* Compute how many table entries to check now so I get round - * the whole table fast enough given that I do this at fixed - * intervals of 'p' seconds) */ - chunk = conn_entries; - if (min_timeout > n * p) - chunk = (chunk * n * p) / min_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kranal_reaper_check(conn_index, - &next_min_timeout); - conn_index = (conn_index + 1) % conn_entries; - } - - next_check_time += p * HZ; - - spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags); - - if (((conn_index - chunk <= base_index && - base_index < conn_index) || - (conn_index - conn_entries - chunk <= base_index && - base_index < conn_index - conn_entries))) { - - /* Scanned all conns: set current_min_timeout... */ - if (current_min_timeout != next_min_timeout) { - current_min_timeout = next_min_timeout; - CDEBUG(D_NET, "Set new min timeout %ld\n", - current_min_timeout); - } - - /* ...and restart min timeout scan */ - next_min_timeout = MAX_SCHEDULE_TIMEOUT; - base_index = conn_index - 1; - if (base_index < 0) - base_index = conn_entries - 1; - } - } - - kranal_thread_fini(); - return 0; -} - -void -kranal_check_rdma_cq (kra_device_t *dev) -{ - kra_conn_t *conn; - kra_tx_t *tx; - RAP_RETURN rrc; - unsigned long flags; - RAP_RDMA_DESCRIPTOR *desc; - __u32 cqid; - __u32 event_type; - - for (;;) { - rrc = RapkCQDone(dev->rad_rdma_cqh, &cqid, &event_type); - if (rrc == RAP_NOT_DONE) { - CDEBUG(D_NET, "RDMA CQ %d empty\n", dev->rad_id); - return; - } - - LASSERT (rrc == RAP_SUCCESS); - LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0); - - read_lock(&kranal_data.kra_global_lock); - - conn = kranal_cqid2conn_locked(cqid); - if (conn == NULL) { - /* Conn was destroyed? */ - CDEBUG(D_NET, "RDMA CQID lookup %d failed\n", cqid); - read_unlock(&kranal_data.kra_global_lock); - continue; - } - - rrc = RapkRdmaDone(conn->rac_rihandle, &desc); - LASSERT (rrc == RAP_SUCCESS); - - CDEBUG(D_NET, "Completed %p\n", - list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list)); - - spin_lock_irqsave(&conn->rac_lock, flags); - - LASSERT (!list_empty(&conn->rac_rdmaq)); - tx = list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - - LASSERT(desc->AppPtr == (void *)tx); - LASSERT(tx->tx_msg.ram_type == RANAL_MSG_PUT_DONE || - tx->tx_msg.ram_type == RANAL_MSG_GET_DONE); - - list_add_tail(&tx->tx_list, &conn->rac_fmaq); - tx->tx_qtime = jiffies; - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - /* Get conn's fmaq processed, now I've just put something - * there */ - kranal_schedule_conn(conn); - - read_unlock(&kranal_data.kra_global_lock); - } -} - -void -kranal_check_fma_cq (kra_device_t *dev) -{ - kra_conn_t *conn; - RAP_RETURN rrc; - __u32 cqid; - __u32 event_type; - struct list_head *conns; - struct list_head *tmp; - int i; - - for (;;) { - rrc = RapkCQDone(dev->rad_fma_cqh, &cqid, &event_type); - if (rrc == RAP_NOT_DONE) { - CDEBUG(D_NET, "FMA CQ %d empty\n", dev->rad_id); - return; - } - - LASSERT (rrc == RAP_SUCCESS); - - if ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0) { - - read_lock(&kranal_data.kra_global_lock); - - conn = kranal_cqid2conn_locked(cqid); - if (conn == NULL) { - CDEBUG(D_NET, "FMA CQID lookup %d failed\n", - cqid); - } else { - CDEBUG(D_NET, "FMA completed: %p CQID %d\n", - conn, cqid); - kranal_schedule_conn(conn); - } - - read_unlock(&kranal_data.kra_global_lock); - continue; - } - - /* FMA CQ has overflowed: check ALL conns */ - CWARN("FMA CQ overflow: scheduling ALL conns on device %d\n", - dev->rad_id); - - for (i = 0; i < kranal_data.kra_conn_hash_size; i++) { - - read_lock(&kranal_data.kra_global_lock); - - conns = &kranal_data.kra_conns[i]; - - list_for_each (tmp, conns) { - conn = list_entry(tmp, kra_conn_t, - rac_hashlist); - - if (conn->rac_device == dev) - kranal_schedule_conn(conn); - } - - /* don't block write lockers for too long... */ - read_unlock(&kranal_data.kra_global_lock); - } - } -} - -int -kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg, - void *immediate, int immediatenob) -{ - int sync = (msg->ram_type & RANAL_MSG_FENCE) != 0; - RAP_RETURN rrc; - - CDEBUG(D_NET,"%p sending msg %p %02x%s [%p for %d]\n", - conn, msg, msg->ram_type, sync ? "(sync)" : "", - immediate, immediatenob); - - LASSERT (sizeof(*msg) <= RANAL_FMA_MAX_PREFIX); - LASSERT ((msg->ram_type == RANAL_MSG_IMMEDIATE) ? - immediatenob <= RANAL_FMA_MAX_DATA : - immediatenob == 0); - - msg->ram_connstamp = conn->rac_my_connstamp; - msg->ram_seq = conn->rac_tx_seq; - - if (sync) - rrc = RapkFmaSyncSend(conn->rac_rihandle, - immediate, immediatenob, - msg, sizeof(*msg)); - else - rrc = RapkFmaSend(conn->rac_rihandle, - immediate, immediatenob, - msg, sizeof(*msg)); - - switch (rrc) { - default: - LBUG(); - - case RAP_SUCCESS: - conn->rac_last_tx = jiffies; - conn->rac_tx_seq++; - return 0; - - case RAP_NOT_DONE: - if (time_after_eq(jiffies, - conn->rac_last_tx + conn->rac_keepalive*HZ)) - CWARN("EAGAIN sending %02x (idle %lu secs)\n", - msg->ram_type, (jiffies - conn->rac_last_tx)/HZ); - return -EAGAIN; - } -} - -void -kranal_process_fmaq (kra_conn_t *conn) -{ - unsigned long flags; - int more_to_do; - kra_tx_t *tx; - int rc; - int expect_reply; - - /* NB 1. kranal_sendmsg() may fail if I'm out of credits right now. - * However I will be rescheduled by an FMA completion event - * when I eventually get some. - * NB 2. Sampling rac_state here races with setting it elsewhere. - * But it doesn't matter if I try to send a "real" message just - * as I start closing because I'll get scheduled to send the - * close anyway. */ - - /* Not racing with incoming message processing! */ - LASSERT (current == conn->rac_device->rad_scheduler); - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) { - if (!list_empty(&conn->rac_rdmaq)) { - /* RDMAs in progress */ - LASSERT (!conn->rac_close_sent); - - if (time_after_eq(jiffies, - conn->rac_last_tx + - conn->rac_keepalive * HZ)) { - CDEBUG(D_NET, "sending NOOP (rdma in progress)\n"); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP); - kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - } - return; - } - - if (conn->rac_close_sent) - return; - - CWARN("sending CLOSE to %s\n", - libcfs_nid2str(conn->rac_peer->rap_nid)); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_CLOSE); - rc = kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - if (rc != 0) - return; - - conn->rac_close_sent = 1; - if (!conn->rac_close_recvd) - return; - - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_CLOSING) - kranal_terminate_conn_locked(conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - return; - } - - spin_lock_irqsave(&conn->rac_lock, flags); - - if (list_empty(&conn->rac_fmaq)) { - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - if (time_after_eq(jiffies, - conn->rac_last_tx + conn->rac_keepalive * HZ)) { - CDEBUG(D_NET, "sending NOOP -> %s (%p idle %lu(%ld))\n", - libcfs_nid2str(conn->rac_peer->rap_nid), conn, - (jiffies - conn->rac_last_tx)/HZ, conn->rac_keepalive); - kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP); - kranal_sendmsg(conn, &conn->rac_msg, NULL, 0); - } - return; - } - - tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list); - list_del(&tx->tx_list); - more_to_do = !list_empty(&conn->rac_fmaq); - - spin_unlock_irqrestore(&conn->rac_lock, flags); - - expect_reply = 0; - CDEBUG(D_NET, "sending regular msg: %p, type %02x, cookie "LPX64"\n", - tx, tx->tx_msg.ram_type, tx->tx_cookie); - switch (tx->tx_msg.ram_type) { - default: - LBUG(); - - case RANAL_MSG_IMMEDIATE: - rc = kranal_sendmsg(conn, &tx->tx_msg, - tx->tx_buffer, tx->tx_nob); - break; - - case RANAL_MSG_PUT_NAK: - case RANAL_MSG_PUT_DONE: - case RANAL_MSG_GET_NAK: - case RANAL_MSG_GET_DONE: - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - break; - - case RANAL_MSG_PUT_REQ: - rc = kranal_map_buffer(tx); - LASSERT (rc != -EAGAIN); - if (rc != 0) - break; - - tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie; - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - - case RANAL_MSG_PUT_ACK: - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - - case RANAL_MSG_GET_REQ: - rc = kranal_map_buffer(tx); - LASSERT (rc != -EAGAIN); - if (rc != 0) - break; - - tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie; - tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key; - tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits = - (__u64)((unsigned long)tx->tx_buffer); - tx->tx_msg.ram_u.get.ragm_desc.rard_nob = tx->tx_nob; - rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0); - expect_reply = 1; - break; - } - - if (rc == -EAGAIN) { - /* I need credits to send this. Replace tx at the head of the - * fmaq and I'll get rescheduled when credits appear */ - CDEBUG(D_NET, "EAGAIN on %p\n", conn); - spin_lock_irqsave(&conn->rac_lock, flags); - list_add(&tx->tx_list, &conn->rac_fmaq); - spin_unlock_irqrestore(&conn->rac_lock, flags); - return; - } - - if (!expect_reply || rc != 0) { - kranal_tx_done(tx, rc); - } else { - /* LASSERT(current) above ensures this doesn't race with reply - * processing */ - spin_lock_irqsave(&conn->rac_lock, flags); - list_add_tail(&tx->tx_list, &conn->rac_replyq); - tx->tx_qtime = jiffies; - spin_unlock_irqrestore(&conn->rac_lock, flags); - } - - if (more_to_do) { - CDEBUG(D_NET, "Rescheduling %p (more to do)\n", conn); - kranal_schedule_conn(conn); - } -} - -static inline void -kranal_swab_rdma_desc (kra_rdma_desc_t *d) -{ - __swab64s(&d->rard_key.Key); - __swab16s(&d->rard_key.Cookie); - __swab16s(&d->rard_key.MdHandle); - __swab32s(&d->rard_key.Flags); - __swab64s(&d->rard_addr.AddressBits); - __swab32s(&d->rard_nob); -} - -kra_tx_t * -kranal_match_reply(kra_conn_t *conn, int type, __u64 cookie) -{ - struct list_head *ttmp; - kra_tx_t *tx; - unsigned long flags; - - spin_lock_irqsave(&conn->rac_lock, flags); - - list_for_each(ttmp, &conn->rac_replyq) { - tx = list_entry(ttmp, kra_tx_t, tx_list); - - CDEBUG(D_NET,"Checking %p %02x/"LPX64"\n", - tx, tx->tx_msg.ram_type, tx->tx_cookie); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_msg.ram_type != type) { - spin_unlock_irqrestore(&conn->rac_lock, flags); - CWARN("Unexpected type %x (%x expected) " - "matched reply from %s\n", - tx->tx_msg.ram_type, type, - libcfs_nid2str(conn->rac_peer->rap_nid)); - return NULL; - } - - list_del(&tx->tx_list); - spin_unlock_irqrestore(&conn->rac_lock, flags); - return tx; - } - - spin_unlock_irqrestore(&conn->rac_lock, flags); - CWARN("Unmatched reply %02x/"LPX64" from %s\n", - type, cookie, libcfs_nid2str(conn->rac_peer->rap_nid)); - return NULL; -} - -void -kranal_check_fma_rx (kra_conn_t *conn) -{ - unsigned long flags; - __u32 seq; - kra_tx_t *tx; - kra_msg_t *msg; - void *prefix; - RAP_RETURN rrc = RapkFmaGetPrefix(conn->rac_rihandle, &prefix); - kra_peer_t *peer = conn->rac_peer; - int rc = 0; - int repost = 1; - - if (rrc == RAP_NOT_DONE) - return; - - CDEBUG(D_NET, "RX on %p\n", conn); - - LASSERT (rrc == RAP_SUCCESS); - conn->rac_last_rx = jiffies; - seq = conn->rac_rx_seq++; - msg = (kra_msg_t *)prefix; - - /* stash message for portals callbacks they'll NULL - * rac_rxmsg if they consume it */ - LASSERT (conn->rac_rxmsg == NULL); - conn->rac_rxmsg = msg; - - if (msg->ram_magic != RANAL_MSG_MAGIC) { - if (__swab32(msg->ram_magic) != RANAL_MSG_MAGIC) { - CERROR("Unexpected magic %08x from %s\n", - msg->ram_magic, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - __swab32s(&msg->ram_magic); - __swab16s(&msg->ram_version); - __swab16s(&msg->ram_type); - __swab64s(&msg->ram_srcnid); - __swab64s(&msg->ram_connstamp); - __swab32s(&msg->ram_seq); - - /* NB message type checked below; NOT here... */ - switch (msg->ram_type) { - case RANAL_MSG_PUT_ACK: - kranal_swab_rdma_desc(&msg->ram_u.putack.rapam_desc); - break; - - case RANAL_MSG_GET_REQ: - kranal_swab_rdma_desc(&msg->ram_u.get.ragm_desc); - break; - - default: - break; - } - } - - if (msg->ram_version != RANAL_MSG_VERSION) { - CERROR("Unexpected protocol version %d from %s\n", - msg->ram_version, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_srcnid != peer->rap_nid) { - CERROR("Unexpected peer %s from %s\n", - libcfs_nid2str(msg->ram_srcnid), - libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_connstamp != conn->rac_peer_connstamp) { - CERROR("Unexpected connstamp "LPX64"("LPX64 - " expected) from %s\n", - msg->ram_connstamp, conn->rac_peer_connstamp, - libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_seq != seq) { - CERROR("Unexpected sequence number %d(%d expected) from %s\n", - msg->ram_seq, seq, libcfs_nid2str(peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if ((msg->ram_type & RANAL_MSG_FENCE) != 0) { - /* This message signals RDMA completion... */ - rrc = RapkFmaSyncWait(conn->rac_rihandle); - if (rrc != RAP_SUCCESS) { - CERROR("RapkFmaSyncWait failed: %d\n", rrc); - rc = -ENETDOWN; - goto out; - } - } - - if (conn->rac_close_recvd) { - CERROR("Unexpected message %d after CLOSE from %s\n", - msg->ram_type, libcfs_nid2str(conn->rac_peer->rap_nid)); - rc = -EPROTO; - goto out; - } - - if (msg->ram_type == RANAL_MSG_CLOSE) { - CWARN("RX CLOSE from %s\n", libcfs_nid2str(conn->rac_peer->rap_nid)); - conn->rac_close_recvd = 1; - write_lock_irqsave(&kranal_data.kra_global_lock, flags); - - if (conn->rac_state == RANAL_CONN_ESTABLISHED) - kranal_close_conn_locked(conn, 0); - else if (conn->rac_state == RANAL_CONN_CLOSING && - conn->rac_close_sent) - kranal_terminate_conn_locked(conn); - - write_unlock_irqrestore(&kranal_data.kra_global_lock, flags); - goto out; - } - - if (conn->rac_state != RANAL_CONN_ESTABLISHED) - goto out; - - switch (msg->ram_type) { - case RANAL_MSG_NOOP: - /* Nothing to do; just a keepalive */ - CDEBUG(D_NET, "RX NOOP on %p\n", conn); - break; - - case RANAL_MSG_IMMEDIATE: - CDEBUG(D_NET, "RX IMMEDIATE on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.immediate.raim_hdr, - msg->ram_srcnid, conn, 0); - repost = rc < 0; - break; - - case RANAL_MSG_PUT_REQ: - CDEBUG(D_NET, "RX PUT_REQ on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.putreq.raprm_hdr, - msg->ram_srcnid, conn, 1); - repost = rc < 0; - break; - - case RANAL_MSG_PUT_NAK: - CDEBUG(D_NET, "RX PUT_NAK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, -ENOENT); /* no match */ - break; - - case RANAL_MSG_PUT_ACK: - CDEBUG(D_NET, "RX PUT_ACK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ, - msg->ram_u.putack.rapam_src_cookie); - if (tx == NULL) - break; - - kranal_rdma(tx, RANAL_MSG_PUT_DONE, - &msg->ram_u.putack.rapam_desc, - msg->ram_u.putack.rapam_desc.rard_nob, - msg->ram_u.putack.rapam_dst_cookie); - break; - - case RANAL_MSG_PUT_DONE: - CDEBUG(D_NET, "RX PUT_DONE on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_PUT_ACK, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, 0); - break; - - case RANAL_MSG_GET_REQ: - CDEBUG(D_NET, "RX GET_REQ on %p\n", conn); - rc = lnet_parse(kranal_data.kra_ni, &msg->ram_u.get.ragm_hdr, - msg->ram_srcnid, conn, 1); - repost = rc < 0; - break; - - case RANAL_MSG_GET_NAK: - CDEBUG(D_NET, "RX GET_NAK on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); - kranal_tx_done(tx, -ENOENT); /* no match */ - break; - - case RANAL_MSG_GET_DONE: - CDEBUG(D_NET, "RX GET_DONE on %p\n", conn); - tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ, - msg->ram_u.completion.racm_cookie); - if (tx == NULL) - break; - - LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED || - tx->tx_buftype == RANAL_BUF_VIRT_MAPPED); -#if 0 - /* completion message should send rdma length if we ever allow - * GET truncation */ - lnet_set_reply_msg_len(kranal_data.kra_ni, tx->tx_lntmsg[1], ???); -#endif - kranal_tx_done(tx, 0); - break; - } - - out: - if (rc < 0) /* protocol/comms error */ - kranal_close_conn (conn, rc); - - if (repost && conn->rac_rxmsg != NULL) - kranal_consume_rxmsg(conn, NULL, 0); - - /* check again later */ - kranal_schedule_conn(conn); -} - -void -kranal_complete_closed_conn (kra_conn_t *conn) -{ - kra_tx_t *tx; - int nfma; - int nreplies; - - LASSERT (conn->rac_state == RANAL_CONN_CLOSED); - LASSERT (list_empty(&conn->rac_list)); - LASSERT (list_empty(&conn->rac_hashlist)); - - for (nfma = 0; !list_empty(&conn->rac_fmaq); nfma++) { - tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -ECONNABORTED); - } - - LASSERT (list_empty(&conn->rac_rdmaq)); - - for (nreplies = 0; !list_empty(&conn->rac_replyq); nreplies++) { - tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list); - - list_del(&tx->tx_list); - kranal_tx_done(tx, -ECONNABORTED); - } - - CWARN("Closed conn %p -> %s: nmsg %d nreplies %d\n", - conn, libcfs_nid2str(conn->rac_peer->rap_nid), nfma, nreplies); -} - -int -kranal_process_new_conn (kra_conn_t *conn) -{ - RAP_RETURN rrc; - - rrc = RapkCompleteSync(conn->rac_rihandle, 1); - if (rrc == RAP_SUCCESS) - return 0; - - LASSERT (rrc == RAP_NOT_DONE); - if (!time_after_eq(jiffies, conn->rac_last_tx + - conn->rac_timeout * HZ)) - return -EAGAIN; - - /* Too late */ - rrc = RapkCompleteSync(conn->rac_rihandle, 0); - LASSERT (rrc == RAP_SUCCESS); - return -ETIMEDOUT; -} - -int -kranal_scheduler (void *arg) -{ - kra_device_t *dev = (kra_device_t *)arg; - wait_queue_t wait; - char name[16]; - kra_conn_t *conn; - unsigned long flags; - unsigned long deadline; - unsigned long soonest; - int nsoonest; - long timeout; - struct list_head *tmp; - struct list_head *nxt; - int rc; - int dropped_lock; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx); - cfs_daemonize(name); - cfs_block_allsigs(); - - dev->rad_scheduler = current; - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&dev->rad_lock, flags); - - while (!kranal_data.kra_shutdown) { - /* Safe: kra_shutdown only set when quiescent */ - - if (busy_loops++ >= RANAL_RESCHED) { - spin_unlock_irqrestore(&dev->rad_lock, flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&dev->rad_lock, flags); - } - - dropped_lock = 0; - - if (dev->rad_ready) { - /* Device callback fired since I last checked it */ - dev->rad_ready = 0; - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - kranal_check_rdma_cq(dev); - kranal_check_fma_cq(dev); - - spin_lock_irqsave(&dev->rad_lock, flags); - } - - list_for_each_safe(tmp, nxt, &dev->rad_ready_conns) { - conn = list_entry(tmp, kra_conn_t, rac_schedlist); - - list_del_init(&conn->rac_schedlist); - LASSERT (conn->rac_scheduled); - conn->rac_scheduled = 0; - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - kranal_check_fma_rx(conn); - kranal_process_fmaq(conn); - - if (conn->rac_state == RANAL_CONN_CLOSED) - kranal_complete_closed_conn(conn); - - kranal_conn_decref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - } - - nsoonest = 0; - soonest = jiffies; - - list_for_each_safe(tmp, nxt, &dev->rad_new_conns) { - conn = list_entry(tmp, kra_conn_t, rac_schedlist); - - deadline = conn->rac_last_tx + conn->rac_keepalive; - if (time_after_eq(jiffies, deadline)) { - /* Time to process this new conn */ - spin_unlock_irqrestore(&dev->rad_lock, flags); - dropped_lock = 1; - - rc = kranal_process_new_conn(conn); - if (rc != -EAGAIN) { - /* All done with this conn */ - spin_lock_irqsave(&dev->rad_lock, flags); - list_del_init(&conn->rac_schedlist); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - kranal_conn_decref(conn); - spin_lock_irqsave(&dev->rad_lock, flags); - continue; - } - - /* retry with exponential backoff until HZ */ - if (conn->rac_keepalive == 0) - conn->rac_keepalive = 1; - else if (conn->rac_keepalive <= HZ) - conn->rac_keepalive *= 2; - else - conn->rac_keepalive += HZ; - - deadline = conn->rac_last_tx + conn->rac_keepalive; - spin_lock_irqsave(&dev->rad_lock, flags); - } - - /* Does this conn need attention soonest? */ - if (nsoonest++ == 0 || - !time_after_eq(deadline, soonest)) - soonest = deadline; - } - - if (dropped_lock) /* may sleep iff I didn't drop the lock */ - continue; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&dev->rad_waitq, &wait); - spin_unlock_irqrestore(&dev->rad_lock, flags); - - if (nsoonest == 0) { - busy_loops = 0; - schedule(); - } else { - timeout = (long)(soonest - jiffies); - if (timeout > 0) { - busy_loops = 0; - schedule_timeout(timeout); - } - } - - remove_wait_queue(&dev->rad_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&dev->rad_lock, flags); - } - - spin_unlock_irqrestore(&dev->rad_lock, flags); - - dev->rad_scheduler = NULL; - kranal_thread_fini(); - return 0; -} diff --git a/lnet/klnds/ralnd/ralnd_modparams.c b/lnet/klnds/ralnd/ralnd_modparams.c deleted file mode 100644 index 45f42e1327099417667a184215a3198e8b737684..0000000000000000000000000000000000000000 --- a/lnet/klnds/ralnd/ralnd_modparams.c +++ /dev/null @@ -1,135 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "ralnd.h" - -static int n_connd = 4; -CFS_MODULE_PARM(n_connd, "i", int, 0444, - "# of connection daemons"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of transmit descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 32; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int fma_cq_size = 8192; -CFS_MODULE_PARM(fma_cq_size, "i", int, 0444, - "size of the completion queue"); - -static int timeout = 30; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "communications timeout (seconds)"); - -static int max_immediate = (2<<10); -CFS_MODULE_PARM(max_immediate, "i", int, 0644, - "immediate/RDMA breakpoint"); - -kra_tunables_t kranal_tunables = { - .kra_n_connd = &n_connd, - .kra_min_reconnect_interval = &min_reconnect_interval, - .kra_max_reconnect_interval = &max_reconnect_interval, - .kra_ntx = &ntx, - .kra_credits = &credits, - .kra_peercredits = &peer_credits, - .kra_fma_cq_size = &fma_cq_size, - .kra_timeout = &timeout, - .kra_max_immediate = &max_immediate, -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table kranal_ctl_table[] = { - {1, "n_connd", &n_connd, - sizeof(int), 0444, NULL, &proc_dointvec}, - {2, "min_reconnect_interval", &min_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {3, "max_reconnect_interval", &max_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {4, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {5, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {6, "peer_credits", &peer_credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {7, "fma_cq_size", &fma_cq_size, - sizeof(int), 0444, NULL, &proc_dointvec}, - {8, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {9, "max_immediate", &max_immediate, - sizeof(int), 0644, NULL, &proc_dointvec}, - {0} -}; - -static ctl_table kranal_top_ctl_table[] = { - {202, "ranal", NULL, 0, 0555, kranal_ctl_table}, - {0} -}; - -int -kranal_tunables_init () -{ - kranal_tunables.kra_sysctl = - register_sysctl_table(kranal_top_ctl_table, 0); - - if (kranal_tunables.kra_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kranal_tunables_fini () -{ - if (kranal_tunables.kra_sysctl != NULL) - unregister_sysctl_table(kranal_tunables.kra_sysctl); -} - -#else - -int -kranal_tunables_init () -{ - return 0; -} - -void -kranal_tunables_fini () -{ -} - -#endif - diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/klnds/socklnd/Info.plist b/lnet/klnds/socklnd/Info.plist deleted file mode 100644 index f5a5460ad280753fcd0c15f66e70428143676416..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/Info.plist +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>ksocklnd</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.ksocklnd</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - <key>com.clusterfs.lustre.lnet</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> diff --git a/lnet/klnds/socklnd/Makefile.in b/lnet/klnds/socklnd/Makefile.in deleted file mode 100644 index 3a6c3f78043a9108a9fc0cc1c0df5ed763149d46..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/Makefile.in +++ /dev/null @@ -1,5 +0,0 @@ -MODULES := ksocklnd - -ksocklnd-objs := socklnd.o socklnd_cb.o socklnd_modparams.o socklnd_lib-linux.o - -@INCLUDE_RULES@ diff --git a/lnet/klnds/socklnd/autoMakefile.am b/lnet/klnds/socklnd/autoMakefile.am deleted file mode 100644 index 0dbe69756ca6f4383fe31b80714bb909d4e303ff..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/autoMakefile.am +++ /dev/null @@ -1,30 +0,0 @@ -if MODULES -if LINUX - - modulenet_DATA := ksocklnd$(KMODEXT) - -endif # LINUX -endif # MODULES - -DIST_SOURCES := $(ksocklnd-objs:%.o=%.c) socklnd_lib-linux.h socklnd.h - -if DARWIN - - macos_PROGRAMS := ksocklnd - - nodist_ksocklnd_SOURCES := socklnd.c socklnd_cb.c socklnd_modparams.c socklnd_lib-darwin.c - DIST_SOURCES += socklnd_lib-darwin.c socklnd_lib-darwin.h - - ksocklnd_CFLAGS := $(EXTRA_KCFLAGS) - ksocklnd_LDFLAGS := $(EXTRA_KLDFLAGS) - ksocklnd_LDADD := $(EXTRA_KLIBS) - - plist_DATA := Info.plist - install_data_hook := fix-kext-ownership - -endif # DARWIN - -EXTRA_DIST := $(plist_DATA) -install-data-hook: $(install_data_hook) - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ socklnd_lib.c diff --git a/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj b/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj deleted file mode 100644 index cab8b433f3a1b3509794dd72a68307b8ebed8b0d..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj +++ /dev/null @@ -1,287 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = ksocknal; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 1957C5680737C71F00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = socknal.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C5690737C71F00425049 = { - fileRef = 1957C5680737C71F00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 1957C56A0737C72F00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = socknal_cb.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C56B0737C72F00425049 = { - fileRef = 1957C56A0737C72F00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 1957C5B20737C78E00425049 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = socknal_lib.c; - path = arch/xnu/socknal_lib.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1957C5B30737C78E00425049 = { - fileRef = 1957C5B20737C78E00425049; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 1957C5B20737C78E00425049, - 1957C56A0737C72F00425049, - 1957C5680737C71F00425049, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = "../../include ./arch/xnu"; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.knals.ksocknal; - MODULE_START = ksocknal_start; - MODULE_STOP = ksocknal_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = ksocknal; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = ksocknal; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = ksocknal; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 1957C5690737C71F00425049, - 1957C56B0737C72F00425049, - 1957C5B30737C78E00425049, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = ksocknal.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c deleted file mode 100644 index 8903f105bf79be6c2f778851849f7831dde18006..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@ -1,2510 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -lnd_t the_ksocklnd = { - .lnd_type = SOCKLND, - .lnd_startup = ksocknal_startup, - .lnd_shutdown = ksocknal_shutdown, - .lnd_ctl = ksocknal_ctl, - .lnd_send = ksocknal_send, - .lnd_recv = ksocknal_recv, - .lnd_notify = ksocknal_notify, - .lnd_accept = ksocknal_accept, -}; - -ksock_nal_data_t ksocknal_data; - -ksock_interface_t * -ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip) -{ - ksock_net_t *net = ni->ni_data; - int i; - ksock_interface_t *iface; - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT(i < LNET_MAX_INTERFACES); - iface = &net->ksnn_interfaces[i]; - - if (iface->ksni_ipaddr == ip) - return (iface); - } - - return (NULL); -} - -ksock_route_t * -ksocknal_create_route (__u32 ipaddr, int port) -{ - ksock_route_t *route; - - LIBCFS_ALLOC (route, sizeof (*route)); - if (route == NULL) - return (NULL); - - atomic_set (&route->ksnr_refcount, 1); - route->ksnr_peer = NULL; - route->ksnr_retry_interval = 0; /* OK to connect at any time */ - route->ksnr_ipaddr = ipaddr; - route->ksnr_port = port; - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - route->ksnr_connected = 0; - route->ksnr_deleted = 0; - route->ksnr_conn_count = 0; - route->ksnr_share_count = 0; - route->ksnr_proto = &ksocknal_protocol_v2x; - - return (route); -} - -void -ksocknal_destroy_route (ksock_route_t *route) -{ - LASSERT (atomic_read(&route->ksnr_refcount) == 0); - - if (route->ksnr_peer != NULL) - ksocknal_peer_decref(route->ksnr_peer); - - LIBCFS_FREE (route, sizeof (*route)); -} - -int -ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_net_t *net = ni->ni_data; - ksock_peer_t *peer; - - LASSERT (id.nid != LNET_NID_ANY); - LASSERT (id.pid != LNET_PID_ANY); - LASSERT (!in_interrupt()); - - LIBCFS_ALLOC (peer, sizeof (*peer)); - if (peer == NULL) - return -ENOMEM; - - memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */ - - peer->ksnp_ni = ni; - peer->ksnp_id = id; - atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ - peer->ksnp_closing = 0; - peer->ksnp_accepting = 0; - peer->ksnp_zc_next_cookie = 1; - CFS_INIT_LIST_HEAD (&peer->ksnp_conns); - CFS_INIT_LIST_HEAD (&peer->ksnp_routes); - CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue); - CFS_INIT_LIST_HEAD (&peer->ksnp_zc_req_list); - spin_lock_init(&peer->ksnp_lock); - - spin_lock_bh (&net->ksnn_lock); - - if (net->ksnn_shutdown) { - spin_unlock_bh (&net->ksnn_lock); - - LIBCFS_FREE(peer, sizeof(*peer)); - CERROR("Can't create peer: network shutdown\n"); - return -ESHUTDOWN; - } - - net->ksnn_npeers++; - - spin_unlock_bh (&net->ksnn_lock); - - *peerp = peer; - return 0; -} - -void -ksocknal_destroy_peer (ksock_peer_t *peer) -{ - ksock_net_t *net = peer->ksnp_ni->ni_data; - - CDEBUG (D_NET, "peer %s %p deleted\n", - libcfs_id2str(peer->ksnp_id), peer); - - LASSERT (atomic_read (&peer->ksnp_refcount) == 0); - LASSERT (peer->ksnp_accepting == 0); - LASSERT (list_empty (&peer->ksnp_conns)); - LASSERT (list_empty (&peer->ksnp_routes)); - LASSERT (list_empty (&peer->ksnp_tx_queue)); - LASSERT (list_empty (&peer->ksnp_zc_req_list)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections and routes keep a reference on their peer - * until they are destroyed, so we can be assured that _all_ state to - * do with this peer has been cleaned up when its refcount drops to - * zero. */ - spin_lock_bh (&net->ksnn_lock); - net->ksnn_npeers--; - spin_unlock_bh (&net->ksnn_lock); -} - -ksock_peer_t * -ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id) -{ - struct list_head *peer_list = ksocknal_nid2peerlist(id.nid); - struct list_head *tmp; - ksock_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, ksock_peer_t, ksnp_list); - - LASSERT (!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_id.nid != id.nid || - peer->ksnp_id.pid != id.pid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_id2str(id), - atomic_read(&peer->ksnp_refcount)); - return (peer); - } - return (NULL); -} - -ksock_peer_t * -ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_peer_t *peer; - - read_lock (&ksocknal_data.ksnd_global_lock); - peer = ksocknal_find_peer_locked (ni, id); - if (peer != NULL) /* +1 ref for caller? */ - ksocknal_peer_addref(peer); - read_unlock (&ksocknal_data.ksnd_global_lock); - - return (peer); -} - -void -ksocknal_unlink_peer_locked (ksock_peer_t *peer) -{ - int i; - __u32 ip; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) { - LASSERT (i < LNET_MAX_INTERFACES); - ip = peer->ksnp_passive_ips[i]; - - ksocknal_ip2iface(peer->ksnp_ni, ip)->ksni_npeers--; - } - - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); - LASSERT (!peer->ksnp_closing); - peer->ksnp_closing = 1; - list_del (&peer->ksnp_list); - /* lose peerlist's ref */ - ksocknal_peer_decref(peer); -} - -int -ksocknal_get_peer_info (lnet_ni_t *ni, int index, - lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port, - int *conn_count, int *share_count) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - ksock_route_t *route; - struct list_head *rtmp; - int i; - int j; - int rc = -ENOENT; - - read_lock (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (peer->ksnp_n_passive_ips == 0 && - list_empty(&peer->ksnp_routes)) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = 0; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - for (j = 0; j < peer->ksnp_n_passive_ips; j++) { - if (index-- > 0) - continue; - - *id = peer->ksnp_id; - *myip = peer->ksnp_passive_ips[j]; - *peer_ip = 0; - *port = 0; - *conn_count = 0; - *share_count = 0; - rc = 0; - goto out; - } - - list_for_each (rtmp, &peer->ksnp_routes) { - if (index-- > 0) - continue; - - route = list_entry(rtmp, ksock_route_t, - ksnr_list); - - *id = peer->ksnp_id; - *myip = route->ksnr_myipaddr; - *peer_ip = route->ksnr_ipaddr; - *port = route->ksnr_port; - *conn_count = route->ksnr_conn_count; - *share_count = route->ksnr_share_count; - rc = 0; - goto out; - } - } - } - out: - read_unlock (&ksocknal_data.ksnd_global_lock); - return (rc); -} - -void -ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) -{ - ksock_peer_t *peer = route->ksnr_peer; - int type = conn->ksnc_type; - ksock_interface_t *iface; - - conn->ksnc_route = route; - ksocknal_route_addref(route); - - if (route->ksnr_myipaddr != conn->ksnc_myipaddr) { - if (route->ksnr_myipaddr == 0) { - /* route wasn't bound locally yet (the initial route) */ - CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - } else { - CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from " - "%u.%u.%u.%u to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(route->ksnr_myipaddr), - HIPQUAD(conn->ksnc_myipaddr)); - - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - route->ksnr_myipaddr = conn->ksnc_myipaddr; - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes++; - } - - route->ksnr_connected |= (1<<type); - route->ksnr_conn_count++; - - /* Successful connection => further attempts can - * proceed immediately */ - route->ksnr_retry_interval = 0; -} - -void -ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) -{ - struct list_head *tmp; - ksock_conn_t *conn; - ksock_route_t *route2; - - LASSERT (!peer->ksnp_closing); - LASSERT (route->ksnr_peer == NULL); - LASSERT (!route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - LASSERT (route->ksnr_connected == 0); - - /* LASSERT(unique) */ - list_for_each(tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == route->ksnr_ipaddr) { - CERROR ("Duplicate route %s %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr)); - LBUG(); - } - } - - route->ksnr_peer = peer; - ksocknal_peer_addref(peer); - /* peer's routelist takes over my ref on 'route' */ - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - - list_for_each(tmp, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_ipaddr != route->ksnr_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - /* keep going (typed routes) */ - } -} - -void -ksocknal_del_route_locked (ksock_route_t *route) -{ - ksock_peer_t *peer = route->ksnr_peer; - ksock_interface_t *iface; - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - - LASSERT (!route->ksnr_deleted); - - /* Close associated conns */ - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry(ctmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_route != route) - continue; - - ksocknal_close_conn_locked (conn, 0); - } - - if (route->ksnr_myipaddr != 0) { - iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni, - route->ksnr_myipaddr); - if (iface != NULL) - iface->ksni_nroutes--; - } - - route->ksnr_deleted = 1; - list_del (&route->ksnr_list); - ksocknal_route_decref(route); /* drop peer's ref */ - - if (list_empty (&peer->ksnp_routes) && - list_empty (&peer->ksnp_conns)) { - /* I've just removed the last route to a peer with no active - * connections */ - ksocknal_unlink_peer_locked (peer); - } -} - -int -ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port) -{ - struct list_head *tmp; - ksock_peer_t *peer; - ksock_peer_t *peer2; - ksock_route_t *route; - ksock_route_t *route2; - int rc; - - if (id.nid == LNET_NID_ANY || - id.pid == LNET_PID_ANY) - return (-EINVAL); - - /* Have a brand new peer ready... */ - rc = ksocknal_create_peer(&peer, ni, id); - if (rc != 0) - return rc; - - route = ksocknal_create_route (ipaddr, port); - if (route == NULL) { - ksocknal_peer_decref(peer); - return (-ENOMEM); - } - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - peer2 = ksocknal_find_peer_locked (ni, id); - if (peer2 != NULL) { - ksocknal_peer_decref(peer); - peer = peer2; - } else { - /* peer table takes my ref on peer */ - list_add_tail (&peer->ksnp_list, - ksocknal_nid2peerlist (id.nid)); - } - - route2 = NULL; - list_for_each (tmp, &peer->ksnp_routes) { - route2 = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route2->ksnr_ipaddr == ipaddr) - break; - - route2 = NULL; - } - if (route2 == NULL) { - ksocknal_add_route_locked(peer, route); - route->ksnr_share_count++; - } else { - ksocknal_route_decref(route); - route2->ksnr_share_count++; - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (0); -} - -void -ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip) -{ - ksock_conn_t *conn; - ksock_route_t *route; - struct list_head *tmp; - struct list_head *nxt; - int nshared; - - LASSERT (!peer->ksnp_closing); - - /* Extra ref prevents peer disappearing until I'm done with it */ - ksocknal_peer_addref(peer); - - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - /* no match */ - if (!(ip == 0 || route->ksnr_ipaddr == ip)) - continue; - - route->ksnr_share_count = 0; - /* This deletes associated conns too */ - ksocknal_del_route_locked (route); - } - - nshared = 0; - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - nshared += route->ksnr_share_count; - } - - if (nshared == 0) { - /* remove everything else if there are no explicit entries - * left */ - - list_for_each_safe (tmp, nxt, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - /* we should only be removing auto-entries */ - LASSERT(route->ksnr_share_count == 0); - ksocknal_del_route_locked (route); - } - - list_for_each_safe (tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - ksocknal_close_conn_locked(conn, 0); - } - } - - ksocknal_peer_decref(peer); - /* NB peer unlinks itself when last conn/route is removed */ -} - -int -ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - ksock_peer_t *peer; - int lo; - int hi; - int i; - int rc = -ENOENT; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) && - (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid))) - continue; - - ksocknal_peer_addref(peer); /* a ref for me... */ - - ksocknal_del_peer_locked (peer, ip); - - if (peer->ksnp_closing && !list_empty(&peer->ksnp_tx_queue)) { - LASSERT (list_empty(&peer->ksnp_conns)); - LASSERT (list_empty(&peer->ksnp_routes)); - - list_splice_init(&peer->ksnp_tx_queue, &zombies); - } - - ksocknal_peer_decref(peer); /* ...till here */ - - rc = 0; /* matched! */ - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_txlist_done(ni, &zombies, 1); - - return (rc); -} - -ksock_conn_t * -ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - ksock_conn_t *conn; - struct list_head *ctmp; - int i; - - read_lock (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - LASSERT (!peer->ksnp_closing); - - if (peer->ksnp_ni != ni) - continue; - - list_for_each (ctmp, &peer->ksnp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - ksocknal_conn_addref(conn); - read_unlock (&ksocknal_data.ksnd_global_lock); - return (conn); - } - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return (NULL); -} - -ksock_sched_t * -ksocknal_choose_scheduler_locked (unsigned int irq) -{ - ksock_sched_t *sched; - ksock_irqinfo_t *info; - int i; - - LASSERT (irq < NR_IRQS); - info = &ksocknal_data.ksnd_irqinfo[irq]; - - if (irq != 0 && /* hardware NIC */ - info->ksni_valid) { /* already set up */ - return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]); - } - - /* software NIC (irq == 0) || not associated with a scheduler yet. - * Choose the CPU with the fewest connections... */ - sched = &ksocknal_data.ksnd_schedulers[0]; - for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++) - if (sched->kss_nconns > - ksocknal_data.ksnd_schedulers[i].kss_nconns) - sched = &ksocknal_data.ksnd_schedulers[i]; - - if (irq != 0) { /* Hardware NIC */ - info->ksni_valid = 1; - info->ksni_sched = sched - ksocknal_data.ksnd_schedulers; - - /* no overflow... */ - LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers); - } - - return (sched); -} - -int -ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs) -{ - ksock_net_t *net = ni->ni_data; - int i; - int nip; - - read_lock (&ksocknal_data.ksnd_global_lock); - - nip = net->ksnn_ninterfaces; - LASSERT (nip < LNET_MAX_INTERFACES); - - /* Only offer interfaces for additional connections if I have - * more than one. */ - if (nip < 2) { - read_unlock (&ksocknal_data.ksnd_global_lock); - return 0; - } - - for (i = 0; i < nip; i++) { - ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr; - LASSERT (ipaddrs[i] != 0); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return (nip); -} - -int -ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) -{ - int best_netmatch = 0; - int best_xor = 0; - int best = -1; - int this_xor; - int this_netmatch; - int i; - - for (i = 0; i < nips; i++) { - if (ips[i] == 0) - continue; - - this_xor = (ips[i] ^ iface->ksni_ipaddr); - this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best < 0 || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_xor > this_xor))) - continue; - - best = i; - best_netmatch = this_netmatch; - best_xor = this_xor; - } - - LASSERT (best >= 0); - return (best); -} - -int -ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - ksock_net_t *net = peer->ksnp_ni->ni_data; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int n_ips; - int i; - int j; - int k; - __u32 ip; - __u32 xor; - int this_netmatch; - int best_netmatch; - int best_npeers; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness shouldn't matter */ - - /* Also note that I'm not going to return more than n_peerips - * interfaces, even if I have more myself */ - - write_lock_bh (global_lock); - - LASSERT (n_peerips <= LNET_MAX_INTERFACES); - LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Only match interfaces for additional connections - * if I have > 1 interface */ - n_ips = (net->ksnn_ninterfaces < 2) ? 0 : - MIN(n_peerips, net->ksnn_ninterfaces); - - for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) { - /* ^ yes really... */ - - /* If we have any new interfaces, first tick off all the - * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. - * We don't forget interfaces we've stopped using; we might - * start using them again... */ - - if (i < peer->ksnp_n_passive_ips) { - /* Old interface. */ - ip = peer->ksnp_passive_ips[i]; - best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip); - - /* peer passive ips are kept up to date */ - LASSERT(best_iface != NULL); - } else { - /* choose a new interface */ - LASSERT (i == peer->ksnp_n_passive_ips); - - best_iface = NULL; - best_netmatch = 0; - best_npeers = 0; - - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - ip = iface->ksni_ipaddr; - - for (k = 0; k < peer->ksnp_n_passive_ips; k++) - if (peer->ksnp_passive_ips[k] == ip) - break; - - if (k < peer->ksnp_n_passive_ips) /* using it already */ - continue; - - k = ksocknal_match_peerip(iface, peerips, n_peerips); - xor = (ip ^ peerips[k]); - this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_npeers > iface->ksni_npeers))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_npeers = iface->ksni_npeers; - } - - best_iface->ksni_npeers++; - ip = best_iface->ksni_ipaddr; - peer->ksnp_passive_ips[i] = ip; - peer->ksnp_n_passive_ips = i+1; - } - - LASSERT (best_iface != NULL); - - /* mark the best matching peer IP used */ - j = ksocknal_match_peerip(best_iface, peerips, n_peerips); - peerips[j] = 0; - } - - /* Overwrite input peer IP addresses */ - memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - - write_unlock_bh (global_lock); - - return (n_ips); -} - -void -ksocknal_create_routes(ksock_peer_t *peer, int port, - __u32 *peer_ipaddrs, int npeer_ipaddrs) -{ - ksock_route_t *newroute = NULL; - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - lnet_ni_t *ni = peer->ksnp_ni; - ksock_net_t *net = ni->ni_data; - struct list_head *rtmp; - ksock_route_t *route; - ksock_interface_t *iface; - ksock_interface_t *best_iface; - int best_netmatch; - int this_netmatch; - int best_nroutes; - int i; - int j; - - /* CAVEAT EMPTOR: We do all our interface matching with an - * exclusive hold of global lock at IRQ priority. We're only - * expecting to be dealing with small numbers of interfaces, so the - * O(n**3)-ness here shouldn't matter */ - - write_lock_bh (global_lock); - - if (net->ksnn_ninterfaces < 2) { - /* Only create additional connections - * if I have > 1 interface */ - write_unlock_bh (global_lock); - return; - } - - LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES); - - for (i = 0; i < npeer_ipaddrs; i++) { - if (newroute != NULL) { - newroute->ksnr_ipaddr = peer_ipaddrs[i]; - } else { - write_unlock_bh (global_lock); - - newroute = ksocknal_create_route(peer_ipaddrs[i], port); - if (newroute == NULL) - return; - - write_lock_bh (global_lock); - } - - if (peer->ksnp_closing) { - /* peer got closed under me */ - break; - } - - /* Already got a route? */ - route = NULL; - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - best_iface = NULL; - best_nroutes = 0; - best_netmatch = 0; - - LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES); - - /* Select interface to connect from */ - for (j = 0; j < net->ksnn_ninterfaces; j++) { - iface = &net->ksnn_interfaces[j]; - - /* Using this interface already? */ - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == iface->ksni_ipaddr) - break; - - route = NULL; - } - if (route != NULL) - continue; - - this_netmatch = (((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & - iface->ksni_netmask) == 0) ? 1 : 0; - - if (!(best_iface == NULL || - best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && - best_nroutes > iface->ksni_nroutes))) - continue; - - best_iface = iface; - best_netmatch = this_netmatch; - best_nroutes = iface->ksni_nroutes; - } - - if (best_iface == NULL) - continue; - - newroute->ksnr_myipaddr = best_iface->ksni_ipaddr; - best_iface->ksni_nroutes++; - - ksocknal_add_route_locked(peer, newroute); - newroute = NULL; - } - - write_unlock_bh (global_lock); - if (newroute != NULL) - ksocknal_route_decref(newroute); -} - -int -ksocknal_accept (lnet_ni_t *ni, cfs_socket_t *sock) -{ - ksock_connreq_t *cr; - int rc; - __u32 peer_ip; - int peer_port; - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - LIBCFS_ALLOC(cr, sizeof(*cr)); - if (cr == NULL) { - LCONSOLE_ERROR("Dropping connection request from " - "%u.%u.%u.%u: memory exhausted\n", - HIPQUAD(peer_ip)); - return -ENOMEM; - } - - lnet_ni_addref(ni); - cr->ksncr_ni = ni; - cr->ksncr_sock = sock; - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs); - cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - return 0; -} - -int -ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, - cfs_socket_t *sock, int type) -{ - rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock; - CFS_LIST_HEAD (zombies); - lnet_process_id_t peerid; - struct list_head *tmp; - __u64 incarnation; - ksock_conn_t *conn; - ksock_conn_t *conn2; - ksock_peer_t *peer = NULL; - ksock_peer_t *peer2; - ksock_sched_t *sched; - ksock_hello_msg_t *hello; - unsigned int irq; - ksock_tx_t *tx; - int rc; - int active; - char *warn = NULL; - - active = (route != NULL); - - LASSERT (active == (type != SOCKLND_CONN_NONE)); - LASSERT (route == NULL || route->ksnr_proto != NULL); - - irq = ksocknal_lib_sock_irq (sock); - - LIBCFS_ALLOC(conn, sizeof(*conn)); - if (conn == NULL) { - rc = -ENOMEM; - goto failed_0; - } - - memset (conn, 0, sizeof (*conn)); - conn->ksnc_peer = NULL; - conn->ksnc_route = NULL; - conn->ksnc_sock = sock; - atomic_set (&conn->ksnc_sock_refcount, 1); /* 1 ref for conn */ - conn->ksnc_type = type; - ksocknal_lib_save_callback(sock, conn); - atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */ - - conn->ksnc_zc_capable = ksocknal_lib_zc_capable(sock); - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - - CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - conn->ksnc_tx_mono = NULL; - atomic_set (&conn->ksnc_tx_nob, 0); - - LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - if (hello == NULL) { - rc = -ENOMEM; - goto failed_1; - } - - /* stash conn's local and remote addrs */ - rc = ksocknal_lib_get_conn_addrs (conn); - if (rc != 0) - goto failed_1; - - /* Find out/confirm peer's NID and connection type and get the - * vector of interfaces she's willing to let me connect to. - * Passive connections use the listener timeout since the peer sends - * eagerly */ - - if (active) { - LASSERT(ni == route->ksnr_peer->ksnp_ni); - - /* Active connection sends HELLO eagerly */ - hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips); - peerid = route->ksnr_peer->ksnp_id; - conn->ksnc_proto = route->ksnr_proto; - - rc = ksocknal_send_hello (ni, conn, peerid.nid, hello); - if (rc != 0) - goto failed_1; - } else { - peerid.nid = LNET_NID_ANY; - peerid.pid = LNET_PID_ANY; - - /* Passive, get protocol from peer */ - conn->ksnc_proto = NULL; - } - - rc = ksocknal_recv_hello (ni, conn, hello, &peerid, &incarnation); - if (rc < 0) { - if (rc == -EALREADY) { - /* only active connection loses conn race */ - LASSERT (active); - - CDEBUG(D_NET, "Lost connection race with %s\n", - libcfs_id2str(peerid)); - /* Not an actual failure: return +ve RC so active - * connector can back off */ - rc = EALREADY; - } - goto failed_1; - } - - if (active && route->ksnr_proto != conn->ksnc_proto) { - /* Active connecting, and different protocol is returned */ - CDEBUG(D_NET, "Connecting by %d.x protocol is rejected," - " compatible version %d.x found.\n", - route->ksnr_proto->pro_version, - conn->ksnc_proto->pro_version); - /* Not an actual failure: return +ve RC so active - * connector can back off */ - rc = EPROTO; - - /* Retry with peer's protocol later */ - route->ksnr_proto = conn->ksnc_proto; - - goto failed_1; - } - - LASSERT (peerid.nid != LNET_NID_ANY); - - if (active) { - peer = route->ksnr_peer; - ksocknal_peer_addref(peer); - - /* additional routes after interface exchange? */ - ksocknal_create_routes(peer, conn->ksnc_port, - hello->kshm_ips, hello->kshm_nips); - - /* setup the socket AFTER I've received hello (it disables - * SO_LINGER). I might call back to the acceptor who may want - * to send a protocol version response and then close the - * socket; this ensures the socket only tears down after the - * response has been sent. */ - rc = ksocknal_lib_setup_sock(sock); - - write_lock_bh (global_lock); - - if (rc != 0) - goto failed_2; - } else { - rc = ksocknal_create_peer(&peer, ni, peerid); - if (rc != 0) - goto failed_1; - - write_lock_bh (global_lock); - - peer2 = ksocknal_find_peer_locked(ni, peerid); - if (peer2 == NULL) { - /* NB this puts an "empty" peer in the peer - * table (which takes my ref) */ - list_add_tail(&peer->ksnp_list, - ksocknal_nid2peerlist(peerid.nid)); - } else { - ksocknal_peer_decref(peer); - peer = peer2; - } - - /* +1 ref for me */ - ksocknal_peer_addref(peer); - peer->ksnp_accepting++; - - /* Am I already connecting to this guy? Resolve in - * favour of higher NID... */ - rc = 0; - if (peerid.nid < ni->ni_nid) { - list_for_each(tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, - ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - if (route->ksnr_connecting) { - rc = EALREADY; /* not a failure */ - warn = "connection race"; - } - - break; - } - } - route = NULL; - - write_unlock_bh (global_lock); - - if (rc != 0) { - /* set CONN_NONE makes returned HELLO acknowledge I - * lost a connection race */ - conn->ksnc_type = SOCKLND_CONN_NONE; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, peerid.nid, hello); - } else { - hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips, - hello->kshm_nips); - rc = ksocknal_send_hello(ni, conn, peerid.nid, hello); - - /* Setup the socket (it disables SO_LINGER). I don't - * do it if I'm sending a negative response to ensure - * the response isn't discarded when I close the socket - * immediately after sending it. */ - if (rc == 0) - rc = ksocknal_lib_setup_sock(sock); - } - - write_lock_bh (global_lock); - peer->ksnp_accepting--; - - if (rc != 0) - goto failed_2; - } - - if (peer->ksnp_closing || - (active && route->ksnr_deleted)) { - /* peer/route got closed under me */ - rc = -ESTALE; - warn = "peer/route removed"; - goto failed_2; - } - - /* Refuse to duplicate an existing connection, unless this is a - * loopback connection */ - if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) { - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr || - conn2->ksnc_myipaddr != conn->ksnc_myipaddr || - conn2->ksnc_type != conn->ksnc_type || - conn2->ksnc_incarnation != incarnation) - continue; - - rc = 0; /* more of a NOOP than a failure */ - warn = "duplicate"; - goto failed_2; - } - } - - /* If the connection created by this route didn't bind to the IP - * address the route connected to, the connection/route matching - * code below probably isn't going to work. */ - if (active && - route->ksnr_ipaddr != conn->ksnc_ipaddr) { - CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(route->ksnr_ipaddr), - HIPQUAD(conn->ksnc_ipaddr)); - } - - /* Search for a route corresponding to the new connection and - * create an association. This allows incoming connections created - * by routes in my peer to match my own route entries so I don't - * continually create duplicate routes. */ - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry(tmp, ksock_route_t, ksnr_list); - - if (route->ksnr_ipaddr != conn->ksnc_ipaddr) - continue; - - ksocknal_associate_route_conn_locked(route, conn); - break; - } - - conn->ksnc_peer = peer; /* conn takes my ref on peer */ - conn->ksnc_incarnation = incarnation; - peer->ksnp_last_alive = cfs_time_current(); - peer->ksnp_error = 0; - - sched = ksocknal_choose_scheduler_locked (irq); - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock); - conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with adding to peer's conn list */ - - list_add (&conn->ksnc_list, &peer->ksnp_conns); - ksocknal_conn_addref(conn); - - ksocknal_new_packet(conn, 0); - - /* NB my callbacks block while I hold ksnd_global_lock */ - ksocknal_lib_set_callback(sock, conn); - - /* Take all the packets blocking for a connection. - * NB, it might be nicer to share these blocked packets among any - * other connections that are becoming established. */ - while (!list_empty (&peer->ksnp_tx_queue)) { - tx = list_entry (peer->ksnp_tx_queue.next, - ksock_tx_t, tx_list); - - list_del (&tx->tx_list); - ksocknal_queue_tx_locked (tx, conn); - } - - rc = ksocknal_close_stale_conns_locked(peer, incarnation); - write_unlock_bh (global_lock); - - if (rc != 0) - CDEBUG(D_NET, "Closed %d stale conns to %s ip %d.%d.%d.%d\n", - rc, libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr)); - - ksocknal_lib_bind_irq (irq); - - /* Call the callbacks right now to get things going. */ - if (ksocknal_connsock_addref(conn) == 0) { - ksocknal_read_callback(conn); - ksocknal_write_callback(conn); - ksocknal_connsock_decref(conn); - } - - CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d" - " incarnation:"LPD64" sched[%d]/%d\n", - libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, - (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); - - LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - ksocknal_conn_decref(conn); - return (0); - - failed_2: - if (!peer->ksnp_closing && - list_empty (&peer->ksnp_conns) && - list_empty (&peer->ksnp_routes)) { - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - ksocknal_unlink_peer_locked(peer); - } - - write_unlock_bh (global_lock); - - if (warn != NULL) { - if (rc < 0) - CERROR("Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - else - CDEBUG(D_NET, "Not creating conn %s type %d: %s\n", - libcfs_id2str(peerid), conn->ksnc_type, warn); - } - - ksocknal_txlist_done(ni, &zombies, 1); - ksocknal_peer_decref(peer); - - failed_1: - if (hello != NULL) - LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t, - kshm_ips[LNET_MAX_INTERFACES])); - - LIBCFS_FREE (conn, sizeof(*conn)); - - failed_0: - libcfs_sock_release(sock); - return rc; -} - -void -ksocknal_close_conn_locked (ksock_conn_t *conn, int error) -{ - /* This just does the immmediate housekeeping, and queues the - * connection for the reaper to terminate. - * Caller holds ksnd_global_lock exclusively in irq context */ - ksock_peer_t *peer = conn->ksnc_peer; - ksock_route_t *route; - ksock_conn_t *conn2; - struct list_head *tmp; - - LASSERT (peer->ksnp_error == 0); - LASSERT (!conn->ksnc_closing); - conn->ksnc_closing = 1; - - /* ksnd_deathrow_conns takes over peer's ref */ - list_del (&conn->ksnc_list); - - route = conn->ksnc_route; - if (route != NULL) { - /* dissociate conn from route... */ - LASSERT (!route->ksnr_deleted); - LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0); - - conn2 = NULL; - list_for_each(tmp, &peer->ksnp_conns) { - conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn2->ksnc_route == route && - conn2->ksnc_type == conn->ksnc_type) - break; - - conn2 = NULL; - } - if (conn2 == NULL) - route->ksnr_connected &= ~(1 << conn->ksnc_type); - - conn->ksnc_route = NULL; - -#if 0 /* irrelevent with only eager routes */ - list_del (&route->ksnr_list); /* make route least favourite */ - list_add_tail (&route->ksnr_list, &peer->ksnp_routes); -#endif - ksocknal_route_decref(route); /* drop conn's ref on route */ - } - - if (list_empty (&peer->ksnp_conns)) { - /* No more connections to this peer */ - - peer->ksnp_error = error; /* stash last conn close reason */ - - if (list_empty (&peer->ksnp_routes)) { - /* I've just closed last conn belonging to a - * peer with no routes to it */ - ksocknal_unlink_peer_locked (peer); - } - } - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); - cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_peer_failed (ksock_peer_t *peer) -{ - time_t last_alive = 0; - int notify = 0; - - /* There has been a connection failure or comms error; but I'll only - * tell LNET I think the peer is dead if it's to another kernel and - * there are no connections or connection attempts in existance. */ - - read_lock (&ksocknal_data.ksnd_global_lock); - - if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 && - list_empty(&peer->ksnp_conns) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { - notify = 1; - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ksnp_last_alive); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (notify) - lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0, - last_alive); -} - -void -ksocknal_terminate_conn (ksock_conn_t *conn) -{ - /* This gets called by the reaper (guaranteed thread context) to - * disengage the socket from its callbacks and close it. - * ksnc_refcount will eventually hit zero, and then the reaper will - * destroy it. */ - ksock_peer_t *peer = conn->ksnc_peer; - ksock_sched_t *sched = conn->ksnc_scheduler; - int failed = 0; - struct list_head *tmp; - struct list_head *nxt; - ksock_tx_t *tx; - LIST_HEAD (zlist); - - LASSERT(conn->ksnc_closing); - - /* wake up the scheduler to "send" all remaining packets to /dev/null */ - spin_lock_bh (&sched->kss_lock); - - if (!conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)){ - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - /* a closing conn is always ready to tx */ - conn->ksnc_tx_ready = 1; - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); - - spin_lock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &peer->ksnp_zc_req_list) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - if (tx->tx_conn != conn) - continue; - - LASSERT (tx->tx_msg.ksm_zc_req_cookie != 0); - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - list_add(&tx->tx_zc_list, &zlist); - } - - spin_unlock(&peer->ksnp_lock); - - list_for_each_safe(tmp, nxt, &zlist) { - tx = list_entry(tmp, ksock_tx_t, tx_zc_list); - - list_del(&tx->tx_zc_list); - ksocknal_tx_decref(tx); - } - - /* serialise with callbacks */ - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_lib_reset_callback(conn->ksnc_sock, conn); - - /* OK, so this conn may not be completely disengaged from its - * scheduler yet, but it _has_ committed to terminate... */ - conn->ksnc_scheduler->kss_nconns--; - - if (peer->ksnp_error != 0) { - /* peer's last conn closed in error */ - LASSERT (list_empty (&peer->ksnp_conns)); - failed = 1; - peer->ksnp_error = 0; /* avoid multiple notifications */ - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (failed) - ksocknal_peer_failed(peer); - - /* The socket is closed on the final put; either here, or in - * ksocknal_{send,recv}msg(). Since we set up the linger2 option - * when the connection was established, this will close the socket - * immediately, aborting anything buffered in it. Any hung - * zero-copy transmits will therefore complete in finite time. */ - ksocknal_connsock_decref(conn); -} - -void -ksocknal_queue_zombie_conn (ksock_conn_t *conn) -{ - /* Queue the conn for the reaper to destroy */ - - LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0); - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); -} - -void -ksocknal_destroy_conn (ksock_conn_t *conn) -{ - /* Final coup-de-grace of the reaper */ - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0); - LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0); - LASSERT (conn->ksnc_sock == NULL); - LASSERT (conn->ksnc_route == NULL); - LASSERT (!conn->ksnc_tx_scheduled); - LASSERT (!conn->ksnc_rx_scheduled); - LASSERT (list_empty(&conn->ksnc_tx_queue)); - - /* complete current receive if any */ - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_LNET_PAYLOAD: - CERROR("Completing partial receive from %s" - ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - lnet_finalize (conn->ksnc_peer->ksnp_ni, - conn->ksnc_cookie, -EIO); - break; - case SOCKNAL_RX_LNET_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of lnet header from %s" - ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of ksock message from %s" - ", ip %d.%d.%d.%d:%d, with error, protocol: %d.x.\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, - conn->ksnc_proto->pro_version); - break; - case SOCKNAL_RX_SLOP: - if (conn->ksnc_rx_started) - CERROR("Incomplete receive of slops from %s" - ", ip %d.%d.%d.%d:%d, with error\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - break; - default: - LBUG (); - break; - } - - ksocknal_peer_decref(conn->ksnc_peer); - - LIBCFS_FREE (conn, sizeof (*conn)); -} - -int -ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why) -{ - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - if (ipaddr == 0 || - conn->ksnc_ipaddr == ipaddr) { - count++; - ksocknal_close_conn_locked (conn, why); - } - } - - return (count); -} - -int -ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) -{ - ksock_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) { - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d " - "incarnation:"LPD64"("LPD64")\n", - libcfs_id2str(peer->ksnp_id), - conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_incarnation, incarnation); - - count++; - ksocknal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) -{ - ksock_peer_t *peer = conn->ksnc_peer; - __u32 ipaddr = conn->ksnc_ipaddr; - int count; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (count); -} - -int -ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr) -{ - ksock_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - int count = 0; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - if (id.nid != LNET_NID_ANY) - lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers; - else { - lo = 0; - hi = ksocknal_data.ksnd_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) { - - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - - if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid))) - continue; - - count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0); - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - /* wildcards always succeed */ - if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -void -ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive) -{ - /* The router is telling me she's been notified of a change in - * gateway state.... */ - lnet_process_id_t id = {.nid = gw_nid, .pid = LNET_PID_ANY}; - - CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid), - alive ? "up" : "down"); - - if (!alive) { - /* If the gateway crashed, close all open connections... */ - ksocknal_close_matching_conns (id, 0); - return; - } - - /* ...otherwise do nothing. We can only establish new connections - * if we have autroutes, and these connect on demand. */ -} - -void -ksocknal_push_peer (ksock_peer_t *peer) -{ - int index; - int i; - struct list_head *tmp; - ksock_conn_t *conn; - - for (index = 0; ; index++) { - read_lock (&ksocknal_data.ksnd_global_lock); - - i = 0; - conn = NULL; - - list_for_each (tmp, &peer->ksnp_conns) { - if (i++ == index) { - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - ksocknal_conn_addref(conn); - break; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (conn == NULL) - break; - - ksocknal_lib_push_conn (conn); - ksocknal_conn_decref(conn); - } -} - -int -ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id) -{ - ksock_peer_t *peer; - struct list_head *tmp; - int index; - int i; - int j; - int rc = -ENOENT; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - for (j = 0; ; j++) { - read_lock (&ksocknal_data.ksnd_global_lock); - - index = 0; - peer = NULL; - - list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(tmp, ksock_peer_t, - ksnp_list); - - if (!((id.nid == LNET_NID_ANY || - id.nid == peer->ksnp_id.nid) && - (id.pid == LNET_PID_ANY || - id.pid == peer->ksnp_id.pid))) { - peer = NULL; - continue; - } - - if (index++ == j) { - ksocknal_peer_addref(peer); - break; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - - if (peer != NULL) { - rc = 0; - ksocknal_push_peer (peer); - ksocknal_peer_decref(peer); - } - } - - } - - return (rc); -} - -int -ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask) -{ - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; - int rc; - int i; - int j; - struct list_head *ptmp; - ksock_peer_t *peer; - struct list_head *rtmp; - ksock_route_t *route; - - if (ipaddress == 0 || - netmask == 0) - return (-EINVAL); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - iface = ksocknal_ip2iface(ni, ipaddress); - if (iface != NULL) { - /* silently ignore dups */ - rc = 0; - } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) { - rc = -ENOSPC; - } else { - iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++]; - - iface->ksni_ipaddr = ipaddress; - iface->ksni_netmask = netmask; - iface->ksni_nroutes = 0; - iface->ksni_npeers = 0; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) { - peer = list_entry(ptmp, ksock_peer_t, ksnp_list); - - for (j = 0; i < peer->ksnp_n_passive_ips; j++) - if (peer->ksnp_passive_ips[j] == ipaddress) - iface->ksni_npeers++; - - list_for_each(rtmp, &peer->ksnp_routes) { - route = list_entry(rtmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr == ipaddress) - iface->ksni_nroutes++; - } - } - } - - rc = 0; - /* NB only new connections will pay attention to the new interface! */ - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -void -ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) -{ - struct list_head *tmp; - struct list_head *nxt; - ksock_route_t *route; - ksock_conn_t *conn; - int i; - int j; - - for (i = 0; i < peer->ksnp_n_passive_ips; i++) - if (peer->ksnp_passive_ips[i] == ipaddr) { - for (j = i+1; j < peer->ksnp_n_passive_ips; j++) - peer->ksnp_passive_ips[j-1] = - peer->ksnp_passive_ips[j]; - peer->ksnp_n_passive_ips--; - break; - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - if (route->ksnr_myipaddr != ipaddr) - continue; - - if (route->ksnr_share_count != 0) { - /* Manually created; keep, but unbind */ - route->ksnr_myipaddr = 0; - } else { - ksocknal_del_route_locked(route); - } - } - - list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_myipaddr == ipaddr) - ksocknal_close_conn_locked (conn, 0); - } -} - -int -ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress) -{ - ksock_net_t *net = ni->ni_data; - int rc = -ENOENT; - struct list_head *tmp; - struct list_head *nxt; - ksock_peer_t *peer; - __u32 this_ip; - int i; - int j; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - this_ip = net->ksnn_interfaces[i].ksni_ipaddr; - - if (!(ipaddress == 0 || - ipaddress == this_ip)) - continue; - - rc = 0; - - for (j = i+1; j < net->ksnn_ninterfaces; j++) - net->ksnn_interfaces[j-1] = - net->ksnn_interfaces[j]; - - net->ksnn_ninterfaces--; - - for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { - list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) { - peer = list_entry(tmp, ksock_peer_t, ksnp_list); - - if (peer->ksnp_ni != ni) - continue; - - ksocknal_peer_del_interface_locked(peer, this_ip); - } - } - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -int -ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc; - - switch(cmd) { - case IOC_LIBCFS_GET_INTERFACE: { - ksock_net_t *net = ni->ni_data; - ksock_interface_t *iface; - - read_lock (&ksocknal_data.ksnd_global_lock); - - if (data->ioc_count < 0 || - data->ioc_count >= net->ksnn_ninterfaces) { - rc = -ENOENT; - } else { - rc = 0; - iface = &net->ksnn_interfaces[data->ioc_count]; - - data->ioc_u32[0] = iface->ksni_ipaddr; - data->ioc_u32[1] = iface->ksni_netmask; - data->ioc_u32[2] = iface->ksni_npeers; - data->ioc_u32[3] = iface->ksni_nroutes; - } - - read_unlock (&ksocknal_data.ksnd_global_lock); - return rc; - } - - case IOC_LIBCFS_ADD_INTERFACE: - return ksocknal_add_interface(ni, - data->ioc_u32[0], /* IP address */ - data->ioc_u32[1]); /* net mask */ - - case IOC_LIBCFS_DEL_INTERFACE: - return ksocknal_del_interface(ni, - data->ioc_u32[0]); /* IP address */ - - case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {0,}; - __u32 myip = 0; - __u32 ip = 0; - int port = 0; - int conn_count = 0; - int share_count = 0; - - rc = ksocknal_get_peer_info(ni, data->ioc_count, - &id, &myip, &ip, &port, - &conn_count, &share_count); - if (rc != 0) - return rc; - - data->ioc_nid = id.nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = port; - data->ioc_u32[2] = myip; - data->ioc_u32[3] = conn_count; - data->ioc_u32[4] = id.pid; - return 0; - } - - case IOC_LIBCFS_ADD_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LUSTRE_SRV_LNET_PID}; - return ksocknal_add_peer (ni, id, - data->ioc_u32[0], /* IP */ - data->ioc_u32[1]); /* port */ - } - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - return ksocknal_del_peer (ni, id, - data->ioc_u32[0]); /* IP */ - } - case IOC_LIBCFS_GET_CONN: { - int txmem; - int rxmem; - int nagle; - ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count); - - if (conn == NULL) - return -ENOENT; - - ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle); - - data->ioc_count = txmem; - data->ioc_nid = conn->ksnc_peer->ksnp_id.nid; - data->ioc_flags = nagle; - data->ioc_u32[0] = conn->ksnc_ipaddr; - data->ioc_u32[1] = conn->ksnc_port; - data->ioc_u32[2] = conn->ksnc_myipaddr; - data->ioc_u32[3] = conn->ksnc_type; - data->ioc_u32[4] = conn->ksnc_scheduler - - ksocknal_data.ksnd_schedulers; - data->ioc_u32[5] = rxmem; - data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid; - ksocknal_conn_decref(conn); - return 0; - } - - case IOC_LIBCFS_CLOSE_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - - return ksocknal_close_matching_conns (id, - data->ioc_u32[0]); - } - case IOC_LIBCFS_REGISTER_MYNID: - /* Ignore if this is a noop */ - if (data->ioc_nid == ni->ni_nid) - return 0; - - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - - case IOC_LIBCFS_PUSH_CONNECTION: { - lnet_process_id_t id = {.nid = data->ioc_nid, - .pid = LNET_PID_ANY}; - - return ksocknal_push(ni, id); - } - default: - return -EINVAL; - } - /* not reached */ -} - -void -ksocknal_free_buffers (void) -{ - LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0); - - if (ksocknal_data.ksnd_schedulers != NULL) - LIBCFS_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers); - - LIBCFS_FREE (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * - ksocknal_data.ksnd_peer_hash_size); - - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - struct list_head zlist; - ksock_tx_t *tx; - - list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs); - list_del_init(&ksocknal_data.ksnd_idle_noop_txs); - spin_unlock(&ksocknal_data.ksnd_tx_lock); - - while(!list_empty(&zlist)) { - tx = list_entry(zlist.next, ksock_tx_t, tx_list); - list_del(&tx->tx_list); - LIBCFS_FREE(tx, tx->tx_desc_size); - } - } else { - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } -} - -void -ksocknal_base_shutdown (void) -{ - ksock_sched_t *sched; - int i; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - LASSERT (ksocknal_data.ksnd_nnets == 0); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - case SOCKNAL_INIT_DATA: - LASSERT (ksocknal_data.ksnd_peers != NULL); - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - LASSERT (list_empty (&ksocknal_data.ksnd_peers[i])); - } - LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs)); - LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes)); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = - &ksocknal_data.ksnd_schedulers[i]; - - LASSERT (list_empty (&kss->kss_tx_conns)); - LASSERT (list_empty (&kss->kss_rx_conns)); - LASSERT (list_empty (&kss->kss_zombie_noop_txs)); - LASSERT (kss->kss_nconns == 0); - } - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq); - cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - sched = &ksocknal_data.ksnd_schedulers[i]; - cfs_waitq_broadcast(&sched->kss_waitq); - } - - i = 4; - read_lock (&ksocknal_data.ksnd_global_lock); - while (ksocknal_data.ksnd_nthreads != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d threads to terminate\n", - ksocknal_data.ksnd_nthreads); - read_unlock (&ksocknal_data.ksnd_global_lock); - cfs_pause(cfs_time_seconds(1)); - read_lock (&ksocknal_data.ksnd_global_lock); - } - read_unlock (&ksocknal_data.ksnd_global_lock); - - ksocknal_free_buffers(); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING; - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - PORTAL_MODULE_UNUSE; -} - - -__u64 -ksocknal_new_incarnation (void) -{ - struct timeval tv; - - /* The incarnation number is the time this module loaded and it - * identifies this particular instance of the socknal. Hopefully - * we won't be able to reboot more frequently than 1MHz for the - * forseeable future :) */ - - do_gettimeofday(&tv); - - return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; -} - -int -ksocknal_base_startup (void) -{ - int rc; - int i; - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - LASSERT (ksocknal_data.ksnd_nnets == 0); - - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - - ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); - if (ksocknal_data.ksnd_peers == NULL) - return -ENOMEM; - - for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); - - rwlock_init(&ksocknal_data.ksnd_global_lock); - - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); - cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq); - - spin_lock_init (&ksocknal_data.ksnd_connd_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes); - cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq); - - spin_lock_init (&ksocknal_data.ksnd_tx_lock); - CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_idle_noop_txs); - - /* NB memset above zeros whole of ksocknal_data, including - * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - PORTAL_MODULE_USE; - - ksocknal_data.ksnd_nschedulers = ksocknal_nsched(); - LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers); - if (ksocknal_data.ksnd_schedulers == NULL) - goto failed; - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; - - spin_lock_init (&kss->kss_lock); - CFS_INIT_LIST_HEAD (&kss->kss_rx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_tx_conns); - CFS_INIT_LIST_HEAD (&kss->kss_zombie_noop_txs); - cfs_waitq_init (&kss->kss_waitq); - } - - for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { - rc = ksocknal_thread_start (ksocknal_scheduler, - &ksocknal_data.ksnd_schedulers[i]); - if (rc != 0) { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - /* must have at least 2 connds to remain responsive to accepts while - * connecting */ - if (*ksocknal_tunables.ksnd_nconnds < 2) - *ksocknal_tunables.ksnd_nconnds = 2; - - for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) { - rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn socknal connd: %d\n", rc); - goto failed; - } - } - - rc = ksocknal_thread_start (ksocknal_reaper, NULL); - if (rc != 0) { - CERROR ("Can't spawn socknal reaper: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - return 0; - - failed: - ksocknal_base_shutdown(); - return -ENETDOWN; -} - -void -ksocknal_shutdown (lnet_ni_t *ni) -{ - ksock_net_t *net = ni->ni_data; - int i; - lnet_process_id_t anyid = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - - LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL); - LASSERT(ksocknal_data.ksnd_nnets > 0); - - spin_lock_bh (&net->ksnn_lock); - net->ksnn_shutdown = 1; /* prevent new peers */ - spin_unlock_bh (&net->ksnn_lock); - - /* Delete all peers */ - ksocknal_del_peer(ni, anyid, 0); - - /* Wait for all peer state to clean up */ - i = 2; - spin_lock_bh (&net->ksnn_lock); - while (net->ksnn_npeers != 0) { - spin_unlock_bh (&net->ksnn_lock); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "waiting for %d peers to disconnect\n", - net->ksnn_npeers); - cfs_pause(cfs_time_seconds(1)); - - spin_lock_bh (&net->ksnn_lock); - } - spin_unlock_bh (&net->ksnn_lock); - - for (i = 0; i < net->ksnn_ninterfaces; i++) { - LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0); - LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0); - } - - LIBCFS_FREE(net, sizeof(*net)); - - ksocknal_data.ksnd_nnets--; - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); -} - -int -ksocknal_enumerate_interfaces(ksock_net_t *net) -{ - char **names; - int i; - int j; - int rc; - int n; - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Can't enumerate interfaces: %d\n", n); - return n; - } - - for (i = j = 0; i < n; i++) { - int up; - __u32 ip; - __u32 mask; - - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ip, &mask); - if (rc != 0) { - CWARN("Can't get interface %s info: %d\n", - names[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s (down)\n", - names[i]); - continue; - } - - if (j == LNET_MAX_INTERFACES) { - CWARN("Ignoring interface %s (too many interfaces)\n", - names[i]); - continue; - } - - net->ksnn_interfaces[j].ksni_ipaddr = ip; - net->ksnn_interfaces[j].ksni_netmask = mask; - j++; - } - - libcfs_ipif_free_enumeration(names, n); - - if (j == 0) - CERROR("Can't find any usable interfaces\n"); - - return j; -} - -int -ksocknal_startup (lnet_ni_t *ni) -{ - ksock_net_t *net; - int rc; - int i; - - LASSERT (ni->ni_lnd == &the_ksocklnd); - - if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) { - rc = ksocknal_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - if (net == NULL) - goto fail_0; - - memset(net, 0, sizeof(*net)); - spin_lock_init(&net->ksnn_lock); - net->ksnn_incarnation = ksocknal_new_incarnation(); - ni->ni_data = net; - ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits; - ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits; - - if (ni->ni_interfaces[0] == NULL) { - rc = ksocknal_enumerate_interfaces(net); - if (rc <= 0) - goto fail_1; - - net->ksnn_ninterfaces = 1; - } else { - for (i = 0; i < LNET_MAX_INTERFACES; i++) { - int up; - - if (ni->ni_interfaces[i] == NULL) - break; - - rc = libcfs_ipif_query( - ni->ni_interfaces[i], &up, - &net->ksnn_interfaces[i].ksni_ipaddr, - &net->ksnn_interfaces[i].ksni_netmask); - - if (rc != 0) { - CERROR("Can't get interface %s info: %d\n", - ni->ni_interfaces[i], rc); - goto fail_1; - } - - if (!up) { - CERROR("Interface %s is down\n", - ni->ni_interfaces[i]); - goto fail_1; - } - } - net->ksnn_ninterfaces = i; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), - net->ksnn_interfaces[0].ksni_ipaddr); - - ksocknal_data.ksnd_nnets++; - - return 0; - - fail_1: - LIBCFS_FREE(net, sizeof(*net)); - fail_0: - if (ksocknal_data.ksnd_nnets == 0) - ksocknal_base_shutdown(); - - return -ENETDOWN; -} - - -void __exit -ksocknal_module_fini (void) -{ - lnet_unregister_lnd(&the_ksocklnd); - ksocknal_lib_tunables_fini(); -} - -int __init -ksocknal_module_init (void) -{ - int rc; - - /* check ksnr_connected/connecting field large enough */ - CLASSERT(SOCKLND_CONN_NTYPES <= 4); - - rc = ksocknal_lib_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_ksocklnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel TCP Socket LND v2.0.0"); -MODULE_LICENSE("GPL"); - -cfs_module(ksocknal, "2.0.0", ksocknal_module_init, ksocknal_module_fini); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h deleted file mode 100644 index efc35d3426d357ba3542c5f4d68f2d7da04a0f93..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@ -1,543 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#define DEBUG_SUBSYSTEM S_LND - -#if defined(__linux__) -#include "socklnd_lib-linux.h" -#elif defined(__APPLE__) -#include "socklnd_lib-darwin.h" -#elif defined(__WINNT__) -#include "socklnd_lib-winnt.h" -#else -#error Unsupported Operating System -#endif - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> -#include <lnet/socklnd.h> - -#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */ - -#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */ - -#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ -#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ - -/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). - * no risk if we're not running on a CONFIG_HIGHMEM platform. */ -#ifdef CONFIG_HIGHMEM -# define SOCKNAL_RISK_KMAP_DEADLOCK 0 -#else -# define SOCKNAL_RISK_KMAP_DEADLOCK 1 -#endif - -typedef struct /* per scheduler state */ -{ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ - struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ - cfs_waitq_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to this scheduler */ -} ksock_sched_t; - -typedef struct -{ - unsigned int ksni_valid:1; /* been set yet? */ - unsigned int ksni_bound:1; /* bound to a cpu yet? */ - unsigned int ksni_sched:6; /* which scheduler (assumes < 64) */ -} ksock_irqinfo_t; - -typedef struct /* in-use interface */ -{ - __u32 ksni_ipaddr; /* interface's IP address */ - __u32 ksni_netmask; /* interface's network mask */ - int ksni_nroutes; /* # routes using (active) */ - int ksni_npeers; /* # peers using (passive) */ - char ksni_name[16]; /* interface name */ -} ksock_interface_t; - -typedef struct -{ - int *ksnd_timeout; /* "stuck" socket timeout (seconds) */ - int *ksnd_nconnds; /* # connection daemons */ - int *ksnd_min_reconnectms; /* first connection retry after (ms)... */ - int *ksnd_max_reconnectms; /* ...exponentially increasing to this */ - int *ksnd_eager_ack; /* make TCP ack eagerly? */ - int *ksnd_typed_conns; /* drive sockets by type? */ - int *ksnd_min_bulk; /* smallest "large" message */ - int *ksnd_tx_buffer_size; /* socket tx buffer size */ - int *ksnd_rx_buffer_size; /* socket rx buffer size */ - int *ksnd_nagle; /* enable NAGLE? */ - int *ksnd_keepalive_idle; /* # idle secs before 1st probe */ - int *ksnd_keepalive_count; /* # probes */ - int *ksnd_keepalive_intvl; /* time between probes */ - int *ksnd_credits; /* # concurrent sends */ - int *ksnd_peercredits; /* # concurrent sends to 1 peer */ - int *ksnd_enable_csum; /* enable check sum */ - int *ksnd_inject_csum_error; /* set non-zero to inject checksum error */ - unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */ -#ifdef CPU_AFFINITY - int *ksnd_irq_affinity; /* enable IRQ affinity? */ -#endif -#ifdef SOCKNAL_BACKOFF - int *ksnd_backoff_init; /* initial TCP backoff */ - int *ksnd_backoff_max; /* maximum TCP backoff */ -#endif -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */ -#endif -} ksock_tunables_t; - -typedef struct -{ - __u64 ksnn_incarnation; /* my epoch */ - spinlock_t ksnn_lock; /* serialise */ - int ksnn_npeers; /* # peers */ - int ksnn_shutdown; /* shutting down? */ - int ksnn_ninterfaces; /* IP interfaces */ - ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES]; -} ksock_net_t; - -typedef struct -{ - int ksnd_init; /* initialisation state */ - int ksnd_nnets; /* # networks set up */ - - rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */ - struct list_head *ksnd_peers; /* hash table of all my known peers */ - int ksnd_peer_hash_size; /* size of ksnd_peers */ - - int ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - int ksnd_nschedulers; /* # schedulers */ - ksock_sched_t *ksnd_schedulers; /* their state */ - - atomic_t ksnd_nactive_txs; /* #active txs */ - - struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/ - struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */ - struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/ - cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */ - cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - int ksnd_enomem_tx; /* test ENOMEM sender */ - int ksnd_stall_tx; /* test sluggish sender */ - int ksnd_stall_rx; /* test sluggish receiver */ - - struct list_head ksnd_connd_connreqs; /* incoming connection requests */ - struct list_head ksnd_connd_routes; /* routes waiting to be connected */ - cfs_waitq_t ksnd_connd_waitq; /* connds sleep here */ - int ksnd_connd_connecting;/* # connds connecting */ - spinlock_t ksnd_connd_lock; /* serialise */ - - struct list_head ksnd_idle_noop_txs; /* list head for freed noop tx */ - spinlock_t ksnd_tx_lock; /* serialise, NOT safe in g_lock */ - - ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ - -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_ALL 2 - -/* A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments (the first frag contains the portals header), - * followed by 0 or more lnet_kiov_t fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, the payload is - * received into either struct iovec or lnet_kiov_t fragments, depending on - * what the header matched or whether the message needs forwarding. */ - -struct ksock_conn; /* forward ref */ -struct ksock_peer; /* forward ref */ -struct ksock_route; /* forward ref */ -struct ksock_protocol; /* forward ref */ - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - struct list_head tx_zc_list; /* queue on peer for ZC request */ - atomic_t tx_refcount; /* tx reference count */ - int tx_nob; /* # packet bytes */ - int tx_resid; /* residual bytes */ - int tx_niov; /* # packet iovec frags */ - struct iovec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - unsigned int tx_checked_zc; /* Have I checked if I should ZC? */ - lnet_kiov_t *tx_kiov; /* packet page frags */ - struct ksock_conn *tx_conn; /* owning conn */ - lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */ - ksock_msg_t tx_msg; /* socklnd message buffer */ - int tx_desc_size; /* size of this descriptor */ - union { - struct { - struct iovec iov; /* virt hdr */ - lnet_kiov_t kiov[0]; /* paged payload */ - } paged; - struct { - struct iovec iov[1]; /* virt hdr + payload */ - } virt; - } tx_frags; -} ksock_tx_t; - -#define KSOCK_NOOP_TX_SIZE offsetof(ksock_tx_t, tx_frags.paged.kiov[0]) - -/* network zero copy callback descriptor embedded in ksock_tx_t */ - -/* space for the rx frag descriptors; we either read a single contiguous - * header, or up to LNET_MAX_IOV frags of payload of either type. */ -typedef union { - struct iovec iov[LNET_MAX_IOV]; - lnet_kiov_t kiov[LNET_MAX_IOV]; -} ksock_rxiovspace_t; - -#define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ -#define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ -#define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ -#define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */ -#define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */ -#define SOCKNAL_RX_SLOP 6 /* skipping body */ - -typedef struct ksock_conn -{ - struct ksock_peer *ksnc_peer; /* owning peer */ - struct ksock_route *ksnc_route; /* owning route */ - struct list_head ksnc_list; /* stash on peer's conn list */ - cfs_socket_t *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ - void *ksnc_saved_write_space; /* socket's original write_space() callback */ - atomic_t ksnc_conn_refcount; /* conn refcount */ - atomic_t ksnc_sock_refcount; /* sock refcount */ - ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ - __u32 ksnc_myipaddr; /* my IP */ - __u32 ksnc_ipaddr; /* peer's IP */ - int ksnc_port; /* peer's port */ - int ksnc_type:3; /* type of connection, should be signed value */ - int ksnc_closing:1; /* being shut down */ - int ksnc_flip:1; /* flip or not, only for V2.x */ - int ksnc_zc_capable:1; /* enable to ZC */ - __u64 ksnc_incarnation; /* peer's incarnation */ - - /* reader */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */ - __u8 ksnc_rx_started; /* started receiving a message */ - __u8 ksnc_rx_ready; /* data ready to read */ - __u8 ksnc_rx_scheduled; /* being progressed */ - __u8 ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct iovec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - lnet_kiov_t *ksnc_rx_kiov; /* the page frags */ - ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ - __u32 ksnc_rx_csum; /* partial checksum for incoming data */ - void *ksnc_cookie; /* rx lnet_finalize passthru arg */ - ksock_msg_t ksnc_msg; /* incoming message buffer: - * V2.x message takes the whole struct - * V1.x message is a bare lnet_hdr_t, it's stored - * in ksnc_msg.ksm_u.lnetmsg */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - ksock_tx_t *ksnc_tx_mono; /* V2.x only, next mono-packet, mono-packet is : - * a. lnet packet without piggyback - * b. noop ZC-ACK packet */ - cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */ - int ksnc_tx_bufnob; /* send buffer marker */ - atomic_t ksnc_tx_nob; /* # bytes queued */ - int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - - struct ksock_protocol *ksnc_proto; /* protocol table for the connection */ - -#if !SOCKNAL_SINGLE_FRAG_RX - struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV]; -#endif -#if !SOCKNAL_SINGLE_FRAG_TX - struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV]; -#endif -} ksock_conn_t; - -typedef struct ksock_route -{ - struct list_head ksnr_list; /* chain on peer route list */ - struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ - struct ksock_peer *ksnr_peer; /* owning peer */ - atomic_t ksnr_refcount; /* # users */ - cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */ - cfs_duration_t ksnr_retry_interval; /* how long between retries */ - __u32 ksnr_myipaddr; /* my IP */ - __u32 ksnr_ipaddr; /* IP address to connect to */ - int ksnr_port; /* port to connect to */ - unsigned int ksnr_scheduled:1; /* scheduled for attention */ - unsigned int ksnr_connecting:1; /* connection establishment in progress */ - unsigned int ksnr_connected:4; /* connections established by type */ - unsigned int ksnr_deleted:1; /* been removed from peer? */ - unsigned int ksnr_share_count; /* created explicitly? */ - int ksnr_conn_count; /* # conns established by this route */ - struct ksock_protocol *ksnr_proto ; /* protocol table for connecting */ -} ksock_route_t; - -typedef struct ksock_peer -{ - struct list_head ksnp_list; /* stash on global peer list */ - lnet_process_id_t ksnp_id; /* who's on the other end(s) */ - atomic_t ksnp_refcount; /* # users */ - int ksnp_sharecount; /* lconf usage counter */ - int ksnp_closing; /* being closed */ - int ksnp_accepting; /* # passive connections pending */ - int ksnp_error; /* errno on closing last conn */ - __u64 ksnp_zc_next_cookie;/* ZC completion cookie */ - struct list_head ksnp_conns; /* all active connections */ - struct list_head ksnp_routes; /* routes */ - struct list_head ksnp_tx_queue; /* waiting packets */ - spinlock_t ksnp_lock; /* serialize, NOT safe in g_lock */ - struct list_head ksnp_zc_req_list; /* zero copy requests wait for ACK */ - cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */ - lnet_ni_t *ksnp_ni; /* which network */ - int ksnp_n_passive_ips; /* # of... */ - __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */ -} ksock_peer_t; - -typedef struct ksock_connreq -{ - struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ - lnet_ni_t *ksncr_ni; /* chosen NI */ - cfs_socket_t *ksncr_sock; /* accepted socket */ -} ksock_connreq_t; - -extern ksock_nal_data_t ksocknal_data; -extern ksock_tunables_t ksocknal_tunables; - -typedef struct ksock_protocol -{ - int pro_version; /* version number of protocol */ - int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); /* handshake function */ - int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int);/* handshake function */ - void (*pro_pack)(ksock_tx_t *); /* message pack */ - void (*pro_unpack)(ksock_msg_t *); /* message unpack */ -} ksock_protocol_t; - -extern ksock_protocol_t ksocknal_protocol_v1x; -extern ksock_protocol_t ksocknal_protocol_v2x; - -#define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR -#define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR -#define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR -#define KSOCK_PROTO_V2 2 - -static inline int -ksocknal_route_mask(void) -{ - if (!*ksocknal_tunables.ksnd_typed_conns) - return (1 << SOCKLND_CONN_ANY); - - return ((1 << SOCKLND_CONN_CONTROL) | - (1 << SOCKLND_CONN_BULK_IN) | - (1 << SOCKLND_CONN_BULK_OUT)); -} - -static inline struct list_head * -ksocknal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; - - return (&ksocknal_data.ksnd_peers [hash]); -} - -static inline void -ksocknal_conn_addref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - atomic_inc(&conn->ksnc_conn_refcount); -} - -extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn); - -static inline void -ksocknal_conn_decref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) - ksocknal_queue_zombie_conn(conn); -} - -static inline int -ksocknal_connsock_addref (ksock_conn_t *conn) -{ - int rc = -ESHUTDOWN; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (!conn->ksnc_closing) { - LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); - atomic_inc(&conn->ksnc_sock_refcount); - rc = 0; - } - read_unlock (&ksocknal_data.ksnd_global_lock); - - return (rc); -} - -static inline void -ksocknal_connsock_decref (ksock_conn_t *conn) -{ - LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0); - if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { - LASSERT (conn->ksnc_closing); - libcfs_sock_release(conn->ksnc_sock); - conn->ksnc_sock = NULL; - } -} - -static inline void -ksocknal_tx_addref (ksock_tx_t *tx) -{ - LASSERT (atomic_read(&tx->tx_refcount) > 0); - atomic_inc(&tx->tx_refcount); -} - -extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx); - -static inline void -ksocknal_tx_decref (ksock_tx_t *tx) -{ - LASSERT (atomic_read(&tx->tx_refcount) > 0); - if (atomic_dec_and_test(&tx->tx_refcount)) - ksocknal_tx_done(NULL, tx); -} - -static inline void -ksocknal_route_addref (ksock_route_t *route) -{ - LASSERT (atomic_read(&route->ksnr_refcount) > 0); - atomic_inc(&route->ksnr_refcount); -} - -extern void ksocknal_destroy_route (ksock_route_t *route); - -static inline void -ksocknal_route_decref (ksock_route_t *route) -{ - LASSERT (atomic_read (&route->ksnr_refcount) > 0); - if (atomic_dec_and_test(&route->ksnr_refcount)) - ksocknal_destroy_route (route); -} - -static inline void -ksocknal_peer_addref (ksock_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ksnp_refcount) > 0); - atomic_inc(&peer->ksnp_refcount); -} - -extern void ksocknal_destroy_peer (ksock_peer_t *peer); - -static inline void -ksocknal_peer_decref (ksock_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ksnp_refcount) > 0); - if (atomic_dec_and_test(&peer->ksnp_refcount)) - ksocknal_destroy_peer (peer); -} - -int ksocknal_startup (lnet_ni_t *ni); -void ksocknal_shutdown (lnet_ni_t *ni); -int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int ksocknal_accept(lnet_ni_t *ni, cfs_socket_t *sock); - -extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port); -extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id); -extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id); -extern void ksocknal_peer_failed (ksock_peer_t *peer); -extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route, - cfs_socket_t *sock, int type); -extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why); -extern void ksocknal_terminate_conn (ksock_conn_t *conn); -extern void ksocknal_destroy_conn (ksock_conn_t *conn); -extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation); -extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why); -extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr); - -extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn); -extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error); -extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive); -extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); -extern void ksocknal_thread_fini (void); -extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer); -extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); -extern int ksocknal_scheduler (void *arg); -extern int ksocknal_connd (void *arg); -extern int ksocknal_reaper (void *arg); -extern ksock_protocol_t * ksocknal_compat_protocol(ksock_hello_msg_t *); -extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, - lnet_nid_t peer_nid, ksock_hello_msg_t *hello); -extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, - ksock_hello_msg_t *hello, lnet_process_id_t *id, - __u64 *incarnation); -extern void ksocknal_read_callback(ksock_conn_t *conn); -extern void ksocknal_write_callback(ksock_conn_t *conn); - -extern int ksocknal_lib_zc_capable(cfs_socket_t *sock); -extern void ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn); -extern void ksocknal_lib_push_conn (ksock_conn_t *conn); -extern void ksocknal_lib_bind_irq (unsigned int irq); -extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn); -extern unsigned int ksocknal_lib_sock_irq (cfs_socket_t *sock); -extern int ksocknal_lib_setup_sock (cfs_socket_t *so); -extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx); -extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx); -extern void ksocknal_lib_eager_ack (ksock_conn_t *conn); -extern int ksocknal_lib_recv_iov (ksock_conn_t *conn); -extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn); -extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, - int *rxmem, int *nagle); - -extern int ksocknal_lib_tunables_init(void); -extern void ksocknal_lib_tunables_fini(void); - -extern void ksocknal_lib_csum_tx(ksock_tx_t *tx); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c deleted file mode 100644 index dfd20b7492560f21ba76e48781beff815dda009b..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@ -1,2811 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -ksock_tx_t * -ksocknal_alloc_tx (int size) -{ - ksock_tx_t *tx = NULL; - - if (size == KSOCK_NOOP_TX_SIZE) { - /* searching for a noop tx in free list */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) { - tx = list_entry(ksocknal_data.ksnd_idle_noop_txs.next, - ksock_tx_t, tx_list); - LASSERT(tx->tx_desc_size == size); - list_del(&tx->tx_list); - } - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } - - if (tx == NULL) - LIBCFS_ALLOC(tx, size); - - if (tx == NULL) - return NULL; - - atomic_set(&tx->tx_refcount, 1); - tx->tx_desc_size = size; - atomic_inc(&ksocknal_data.ksnd_nactive_txs); - - return tx; -} - -void -ksocknal_free_tx (ksock_tx_t *tx) -{ - atomic_dec(&ksocknal_data.ksnd_nactive_txs); - - if (tx->tx_desc_size == KSOCK_NOOP_TX_SIZE) { - /* it's a noop tx */ - spin_lock(&ksocknal_data.ksnd_tx_lock); - - list_add(&tx->tx_list, &ksocknal_data.ksnd_idle_noop_txs); - - spin_unlock(&ksocknal_data.ksnd_tx_lock); - } else { - LIBCFS_FREE(tx, tx->tx_desc_size); - } -} - -void -ksocknal_init_msg(ksock_msg_t *msg, int type) -{ - msg->ksm_type = type; - msg->ksm_csum = 0; - msg->ksm_zc_req_cookie = 0; - msg->ksm_zc_ack_cookie = 0; -} - -int -ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct iovec *iov = tx->tx_iov; - int nob; - int rc; - - LASSERT (tx->tx_niov > 0); - - /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */ - rc = ksocknal_lib_send_iov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return (rc); - - nob = rc; - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" iov */ - do { - LASSERT (tx->tx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - return (rc); - } - - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - lnet_kiov_t *kiov = tx->tx_kiov; - int nob; - int rc; - - LASSERT (tx->tx_niov == 0); - LASSERT (tx->tx_nkiov > 0); - - /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ - rc = ksocknal_lib_send_kiov(conn, tx); - - if (rc <= 0) /* sent nothing? */ - return (rc); - - nob = rc; - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; - - /* "consume" kiov */ - do { - LASSERT(tx->tx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return rc; - } - - nob -= kiov->kiov_len; - tx->tx_kiov = ++kiov; - tx->tx_nkiov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) -{ - int rc; - int bufnob; - - if (ksocknal_data.ksnd_stall_tx != 0) { - cfs_pause(cfs_time_seconds(ksocknal_data.ksnd_stall_tx)); - } - - LASSERT (tx->tx_resid != 0); - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - return (-ESHUTDOWN); - } - - do { - if (ksocknal_data.ksnd_enomem_tx > 0) { - /* testing... */ - ksocknal_data.ksnd_enomem_tx--; - rc = -EAGAIN; - } else if (tx->tx_niov != 0) { - rc = ksocknal_send_iov (conn, tx); - } else { - rc = ksocknal_send_kiov (conn, tx); - } - - bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock); - if (rc > 0) /* sent something? */ - conn->ksnc_tx_bufnob += rc; /* account it */ - - if (bufnob < conn->ksnc_tx_bufnob) { - /* allocated send buffer bytes < computed; infer - * something got ACKed */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_tx_bufnob = bufnob; - mb(); - } - - if (rc <= 0) { /* Didn't write anything? */ - ksock_sched_t *sched; - - if (rc == 0) /* some stacks return 0 instead of -EAGAIN */ - rc = -EAGAIN; - - if (rc != -EAGAIN) - break; - - /* Check if EAGAIN is due to memory pressure */ - - sched = conn->ksnc_scheduler; - spin_lock_bh (&sched->kss_lock); - - if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) && - !conn->ksnc_tx_ready) { - /* SOCK_NOSPACE is set when the socket fills - * and cleared in the write_space callback - * (which also sets ksnc_tx_ready). If - * SOCK_NOSPACE and ksnc_tx_ready are BOTH - * zero, I didn't fill the socket and - * write_space won't reschedule me, so I - * return -ENOMEM to get my caller to retry - * after a timeout */ - rc = -ENOMEM; - } - - spin_unlock_bh (&sched->kss_lock); - break; - } - - /* socket's wmem_queued now includes 'rc' bytes */ - atomic_sub (rc, &conn->ksnc_tx_nob); - rc = 0; - - } while (tx->tx_resid != 0); - - ksocknal_connsock_decref(conn); - return (rc); -} - -int -ksocknal_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int nob; - int rc; - - LASSERT (conn->ksnc_rx_niov > 0); - - /* Never touch conn->ksnc_rx_iov or change connection - * status inside ksocknal_lib_recv_iov */ - rc = ksocknal_lib_recv_iov(conn); - - if (rc <= 0) - return (rc); - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT (conn->ksnc_rx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_len -= nob; - iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); - return (-EAGAIN); - } - - nob -= iov->iov_len; - conn->ksnc_rx_iov = ++iov; - conn->ksnc_rx_niov--; - } while (nob != 0); - - return (rc); -} - -int -ksocknal_recv_kiov (ksock_conn_t *conn) -{ - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int nob; - int rc; - LASSERT (conn->ksnc_rx_nkiov > 0); - - /* Never touch conn->ksnc_rx_kiov or change connection - * status inside ksocknal_lib_recv_iov */ - rc = ksocknal_lib_recv_kiov(conn); - - if (rc <= 0) - return (rc); - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT (conn->ksnc_rx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return -EAGAIN; - } - - nob -= kiov->kiov_len; - conn->ksnc_rx_kiov = ++kiov; - conn->ksnc_rx_nkiov--; - } while (nob != 0); - - return 1; -} - -int -ksocknal_receive (ksock_conn_t *conn) -{ - /* Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_nob_wanted to determine - * progress/completion. */ - int rc; - ENTRY; - - if (ksocknal_data.ksnd_stall_rx != 0) { - cfs_pause(cfs_time_seconds (ksocknal_data.ksnd_stall_rx)); - } - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - return (-ESHUTDOWN); - } - - for (;;) { - if (conn->ksnc_rx_niov != 0) - rc = ksocknal_recv_iov (conn); - else - rc = ksocknal_recv_kiov (conn); - - if (rc <= 0) { - /* error/EOF or partial receive */ - if (rc == -EAGAIN) { - rc = 1; - } else if (rc == 0 && conn->ksnc_rx_started) { - /* EOF in the middle of a message */ - rc = -EPROTO; - } - break; - } - - /* Completed a fragment */ - - if (conn->ksnc_rx_nob_wanted == 0) { - rc = 1; - break; - } - } - - ksocknal_connsock_decref(conn); - RETURN (rc); -} - -void -ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx) -{ - lnet_msg_t *lnetmsg = tx->tx_lnetmsg; - int rc = (tx->tx_resid == 0) ? 0 : -EIO; - ENTRY; - - LASSERT(ni != NULL || tx->tx_conn != NULL); - - if (tx->tx_conn != NULL) - ksocknal_conn_decref(tx->tx_conn); - - if (ni == NULL && tx->tx_conn != NULL) - ni = tx->tx_conn->ksnc_peer->ksnp_ni; - - ksocknal_free_tx (tx); - if (lnetmsg != NULL) /* KSOCK_MSG_NOOP go without lnetmsg */ - lnet_finalize (ni, lnetmsg, rc); - - EXIT; -} - -void -ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist, int error) -{ - ksock_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, ksock_tx_t, tx_list); - - if (error && tx->tx_lnetmsg != NULL) { - CDEBUG (D_NETERROR, "Deleting packet type %d len %d %s->%s\n", - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type), - le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length), - libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)), - libcfs_nid2str(le64_to_cpu (tx->tx_lnetmsg->msg_hdr.dest_nid))); - } else if (error) { - CDEBUG (D_NETERROR, "Deleting noop packet\n"); - } - - list_del (&tx->tx_list); - - LASSERT (atomic_read(&tx->tx_refcount) == 1); - ksocknal_tx_done (ni, tx); - } -} - -static void -ksocknal_check_zc_req(ksock_tx_t *tx) -{ - ksock_conn_t *conn = tx->tx_conn; - ksock_peer_t *peer = conn->ksnc_peer; - lnet_kiov_t *kiov = tx->tx_kiov; - int nkiov = tx->tx_nkiov; - - /* Set tx_msg.ksm_zc_req_cookie to a unique non-zero cookie and add tx - * to ksnp_zc_req_list if some fragment of this message should be sent - * zero-copy. Our peer will send an ACK containing this cookie when - * she has received this message to tell us we can signal completion. - * tx_msg.ksm_zc_req_cookie remains non-zero while tx is on - * ksnp_zc_req_list. */ - - if (conn->ksnc_proto != &ksocknal_protocol_v2x || - !conn->ksnc_zc_capable) - return; - - while (nkiov > 0) { - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag) - break; - --nkiov; - ++kiov; - } - - if (nkiov == 0) - return; - - /* assign cookie and queue tx to pending list, it will be released when - * a matching ack is received. See ksocknal_handle_zc_ack() */ - - ksocknal_tx_addref(tx); - - spin_lock(&peer->ksnp_lock); - - LASSERT (tx->tx_msg.ksm_zc_req_cookie == 0); - tx->tx_msg.ksm_zc_req_cookie = peer->ksnp_zc_next_cookie++; - list_add_tail(&tx->tx_zc_list, &peer->ksnp_zc_req_list); - - spin_unlock(&peer->ksnp_lock); -} - -static void -ksocknal_unzc_req(ksock_tx_t *tx) -{ - ksock_peer_t *peer = tx->tx_conn->ksnc_peer; - - spin_lock(&peer->ksnp_lock); - - if (tx->tx_msg.ksm_zc_req_cookie == 0) { - /* Not waiting for an ACK */ - spin_unlock(&peer->ksnp_lock); - return; - } - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); -} - -int -ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) -{ - int rc; - - if (!tx->tx_checked_zc) { - tx->tx_checked_zc = 1; - ksocknal_check_zc_req(tx); - } - - rc = ksocknal_transmit (conn, tx); - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc); - - if (tx->tx_resid == 0) { - /* Sent everything OK */ - LASSERT (rc == 0); - - return (0); - } - - if (rc == -EAGAIN) - return (rc); - - if (rc == -ENOMEM) { - static int counter; - - counter++; /* exponential backoff warnings */ - if ((counter & (-counter)) == counter) - CWARN("%u ENOMEM tx %p (%u allocated)\n", - counter, conn, atomic_read(&libcfs_kmemory)); - - /* Queue on ksnd_enomem_conns for retry after a timeout */ - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - /* enomem list takes over scheduler's ref... */ - LASSERT (conn->ksnc_tx_scheduled); - list_add_tail(&conn->ksnc_tx_list, - &ksocknal_data.ksnd_enomem_conns); - if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(), - SOCKNAL_ENOMEM_RETRY), - ksocknal_data.ksnd_reaper_waketime)) - cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - return (rc); - } - - /* Actual error */ - LASSERT (rc < 0); - - if (!conn->ksnc_closing) { - switch (rc) { - case -ECONNRESET: - LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection " - "while we were sending data; it may have " - "rebooted.\n", - HIPQUAD(conn->ksnc_ipaddr)); - break; - default: - LCONSOLE_WARN("There was an unexpected network error " - "while writing to %u.%u.%u.%u: %d.\n", - HIPQUAD(conn->ksnc_ipaddr), rc); - break; - } - CDEBUG(D_NET, "[%p] Error %d on write to %s" - " ip %d.%d.%d.%d:%d\n", conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - } - - ksocknal_unzc_req(tx); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); - - return (rc); -} - -void -ksocknal_launch_connection_locked (ksock_route_t *route) -{ - - /* called holding write lock on ksnd_global_lock */ - - LASSERT (!route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - LASSERT ((ksocknal_route_mask() & ~route->ksnr_connected) != 0); - - route->ksnr_scheduled = 1; /* scheduling conn for connd */ - ksocknal_route_addref(route); /* extra ref for connd */ - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - list_add_tail (&route->ksnr_connd_list, - &ksocknal_data.ksnd_connd_routes); - cfs_waitq_signal (&ksocknal_data.ksnd_connd_waitq); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); -} - -ksock_conn_t * -ksocknal_find_conn_locked (int payload_nob, ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_conn_t *typed = NULL; - int tnob = 0; - ksock_conn_t *fallback = NULL; - int fnob = 0; - ksock_conn_t *conn; - - list_for_each (tmp, &peer->ksnp_conns) { - ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list); - int hdr_nob = 0; -#if SOCKNAL_ROUND_ROBIN - const int nob = 0; -#else - int nob = atomic_read(&c->ksnc_tx_nob) + - SOCK_WMEM_QUEUED(c->ksnc_sock); -#endif - LASSERT (!c->ksnc_closing); - LASSERT(c->ksnc_proto != NULL); - - if (fallback == NULL || nob < fnob) { - fallback = c; - fnob = nob; - } - - if (!*ksocknal_tunables.ksnd_typed_conns) - continue; - - if (payload_nob == 0) { - /* noop packet */ - hdr_nob = offsetof(ksock_msg_t, ksm_u); - } else { - /* lnet packet */ - hdr_nob = (c->ksnc_proto == &ksocknal_protocol_v2x)? - offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload): - sizeof(lnet_hdr_t); - } - - switch (c->ksnc_type) { - default: - CERROR("ksnc_type bad: %u\n", c->ksnc_type); - LBUG(); - case SOCKLND_CONN_ANY: - break; - case SOCKLND_CONN_BULK_IN: - continue; - case SOCKLND_CONN_BULK_OUT: - if ((hdr_nob + payload_nob) < *ksocknal_tunables.ksnd_min_bulk) - continue; - break; - case SOCKLND_CONN_CONTROL: - if ((hdr_nob + payload_nob) >= *ksocknal_tunables.ksnd_min_bulk) - continue; - break; - } - - if (typed == NULL || nob < tnob) { - typed = c; - tnob = nob; - } - } - - /* prefer the typed selection */ - conn = (typed != NULL) ? typed : fallback; - -#if SOCKNAL_ROUND_ROBIN - if (conn != NULL) { - /* round-robin all else being equal */ - list_del (&conn->ksnc_list); - list_add_tail (&conn->ksnc_list, &peer->ksnp_conns); - } -#endif - return conn; -} - -void -ksocknal_next_mono_tx(ksock_conn_t *conn) -{ - ksock_tx_t *tx = conn->ksnc_tx_mono; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - LASSERT(tx != NULL); - - if (tx->tx_list.next == &conn->ksnc_tx_queue) { - /* no more packets queued */ - conn->ksnc_tx_mono = NULL; - } else { - conn->ksnc_tx_mono = list_entry(tx->tx_list.next, ksock_tx_t, tx_list); - LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == tx->tx_msg.ksm_type); - } -} - -int -ksocknal_piggyback_zcack(ksock_conn_t *conn, __u64 cookie) -{ - ksock_tx_t *tx = conn->ksnc_tx_mono; - - /* Called holding BH lock: conn->ksnc_scheduler->kss_lock */ - - if (tx == NULL) - return 0; - - if (tx->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* tx is noop zc-ack, can't piggyback zc-ack cookie */ - return 0; - } - - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_LNET); - LASSERT(tx->tx_msg.ksm_zc_ack_cookie == 0); - - /* piggyback the zc-ack cookie */ - tx->tx_msg.ksm_zc_ack_cookie = cookie; - ksocknal_next_mono_tx(conn); - - return 1; -} - -void -ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) -{ - ksock_sched_t *sched = conn->ksnc_scheduler; - ksock_msg_t *msg = &tx->tx_msg; - ksock_tx_t *ztx; - int bufnob = 0; - - /* called holding global lock (read or irq-write) and caller may - * not have dropped this lock between finding conn and calling me, - * so we don't need the {get,put}connsock dance to deref - * ksnc_sock... */ - LASSERT(!conn->ksnc_closing); - - CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - - tx->tx_checked_zc = 0; - conn->ksnc_proto->pro_pack(tx); - - /* Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete ksocknal message header. */ - LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) + - lnet_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); - LASSERT (tx->tx_niov >= 1); - LASSERT (tx->tx_resid == tx->tx_nob); - - CDEBUG (D_NET, "Packet %p type %d, nob %d niov %d nkiov %d\n", - tx, (tx->tx_lnetmsg != NULL)? tx->tx_lnetmsg->msg_hdr.type: - KSOCK_MSG_NOOP, - tx->tx_nob, tx->tx_niov, tx->tx_nkiov); - - atomic_add (tx->tx_nob, &conn->ksnc_tx_nob); - tx->tx_conn = conn; - ksocknal_conn_addref(conn); /* +1 ref for tx */ - - /* - * NB Darwin: SOCK_WMEM_QUEUED()->sock_getsockopt() will take - * a blockable lock(socket lock), so SOCK_WMEM_QUEUED can't be - * put in spinlock. - */ - bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock); - spin_lock_bh (&sched->kss_lock); - - if (list_empty(&conn->ksnc_tx_queue) && bufnob == 0) { - /* First packet starts the timeout */ - conn->ksnc_tx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - conn->ksnc_tx_bufnob = 0; - mb(); /* order with adding to tx_queue */ - } - - ztx = NULL; - - if (msg->ksm_type == KSOCK_MSG_NOOP) { - /* The packet is noop ZC ACK, try to piggyback the ack_cookie - * on a normal packet so I don't need to send it */ - LASSERT(msg->ksm_zc_req_cookie == 0); - LASSERT(msg->ksm_zc_ack_cookie != 0); - - if (conn->ksnc_tx_mono != NULL) { - if (ksocknal_piggyback_zcack(conn, msg->ksm_zc_ack_cookie)) { - /* zc-ack cookie is piggybacked */ - atomic_sub (tx->tx_nob, &conn->ksnc_tx_nob); - ztx = tx; /* Put to freelist later */ - } else { - /* no packet can piggyback zc-ack cookie */ - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - } else { - /* It's the first mono-packet */ - conn->ksnc_tx_mono = tx; - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - - } else { - /* It's a normal packet - can it piggback a noop zc-ack that - * has been queued already? */ - LASSERT(msg->ksm_zc_ack_cookie == 0); - - if (conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x packet */ - conn->ksnc_tx_mono != NULL) { - if (conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_NOOP) { - /* There is a noop zc-ack can be piggybacked */ - ztx = conn->ksnc_tx_mono; - - msg->ksm_zc_ack_cookie = ztx->tx_msg.ksm_zc_ack_cookie; - ksocknal_next_mono_tx(conn); - - /* use tx to replace the noop zc-ack packet, ztx will - * be put to freelist later */ - list_add(&tx->tx_list, &ztx->tx_list); - list_del(&ztx->tx_list); - - atomic_sub (ztx->tx_nob, &conn->ksnc_tx_nob); - } else { - /* no noop zc-ack packet, just enqueue it */ - LASSERT(conn->ksnc_tx_mono->tx_msg.ksm_type == KSOCK_MSG_LNET); - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - - } else if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - /* it's the first mono-packet, enqueue it */ - conn->ksnc_tx_mono = tx; - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* V1.x packet, just enqueue it */ - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - } - } - - if (ztx != NULL) - list_add_tail(&ztx->tx_list, &sched->kss_zombie_noop_txs); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - /* +1 ref for scheduler */ - ksocknal_conn_addref(conn); - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); -} - -ksock_route_t * -ksocknal_find_connectable_route_locked (ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_route_t *route; - - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - LASSERT (!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) /* connections being established */ - continue; - - /* all route types connected ? */ - if ((ksocknal_route_mask() & ~route->ksnr_connected) == 0) - continue; - - /* too soon to retry this guy? */ - if (!(route->ksnr_retry_interval == 0 || /* first attempt */ - cfs_time_aftereq (cfs_time_current(), - route->ksnr_timeout))) - continue; - - return (route); - } - - return (NULL); -} - -ksock_route_t * -ksocknal_find_connecting_route_locked (ksock_peer_t *peer) -{ - struct list_head *tmp; - ksock_route_t *route; - - list_for_each (tmp, &peer->ksnp_routes) { - route = list_entry (tmp, ksock_route_t, ksnr_list); - - LASSERT (!route->ksnr_connecting || route->ksnr_scheduled); - - if (route->ksnr_scheduled) - return (route); - } - - return (NULL); -} - -int -ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id) -{ - ksock_peer_t *peer; - ksock_conn_t *conn; - ksock_route_t *route; - rwlock_t *g_lock; - int retry; - int rc; - - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lnetmsg != NULL); - - g_lock = &ksocknal_data.ksnd_global_lock; - - for (retry = 0;; retry = 1) { -#if !SOCKNAL_ROUND_ROBIN - read_lock (g_lock); - peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) { - if (ksocknal_find_connectable_route_locked(peer) == NULL) { - conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer); - if (conn != NULL) { - /* I've got no routes that need to be - * connecting and I do have an actual - * connection... */ - ksocknal_queue_tx_locked (tx, conn); - read_unlock (g_lock); - return (0); - } - } - } - - /* I'll need a write lock... */ - read_unlock (g_lock); -#endif - write_lock_bh (g_lock); - - peer = ksocknal_find_peer_locked(ni, id); - if (peer != NULL) - break; - - write_unlock_bh (g_lock); - - if ((id.pid & LNET_PID_USERFLAG) != 0) { - CERROR("Refusing to create a connection to " - "userspace process %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_id2str(id)); - return -EHOSTUNREACH; - } - - rc = ksocknal_add_peer(ni, id, - LNET_NIDADDR(id.nid), - lnet_acceptor_port()); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_id2str(id), rc); - return rc; - } - } - - for (;;) { - /* launch any/all connections that need it */ - route = ksocknal_find_connectable_route_locked (peer); - if (route == NULL) - break; - - ksocknal_launch_connection_locked (route); - } - - conn = ksocknal_find_conn_locked (tx->tx_lnetmsg->msg_len, peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - ksocknal_queue_tx_locked (tx, conn); - write_unlock_bh (g_lock); - return (0); - } - - if (peer->ksnp_accepting > 0 || - ksocknal_find_connecting_route_locked (peer) != NULL) { - /* Queue the message until a connection is established */ - list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue); - write_unlock_bh (g_lock); - return 0; - } - - write_unlock_bh (g_lock); - - /* NB Routes may be ignored if connections to them failed recently */ - CDEBUG(D_NETERROR, "No usable routes to %s\n", libcfs_id2str(id)); - return (-EHOSTUNREACH); -} - -int -ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - ksock_tx_t *tx; - int desc_size; - int rc; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it... */ - - CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt ()); - - if (payload_iov != NULL) - desc_size = offsetof(ksock_tx_t, - tx_frags.virt.iov[1 + payload_niov]); - else - desc_size = offsetof(ksock_tx_t, - tx_frags.paged.kiov[payload_niov]); - - tx = ksocknal_alloc_tx(desc_size); - if (tx == NULL) { - CERROR("Can't allocate tx desc type %d size %d\n", - type, desc_size); - return (-ENOMEM); - } - - tx->tx_conn = NULL; /* set when assigned a conn */ - tx->tx_lnetmsg = lntmsg; - - if (payload_iov != NULL) { - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1 + - lnet_extract_iov(payload_niov, &tx->tx_iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); - } else { - tx->tx_niov = 1; - tx->tx_iov = &tx->tx_frags.paged.iov; - tx->tx_kiov = tx->tx_frags.paged.kiov; - tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov, - payload_niov, payload_kiov, - payload_offset, payload_nob); - } - - ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_LNET); - - /* The first fragment will be set later in pro_pack */ - rc = ksocknal_launch_packet(ni, tx, target); - if (rc == 0) - return (0); - - ksocknal_free_tx(tx); - return (-EIO); -} - -int -ksocknal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = cfs_kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads++; - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - return (0); -} - -void -ksocknal_thread_fini (void) -{ - write_lock_bh (&ksocknal_data.ksnd_global_lock); - ksocknal_data.ksnd_nthreads--; - write_unlock_bh (&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - - int nob; - unsigned int niov; - int skipped; - - LASSERT(conn->ksnc_proto != NULL); - - if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0) { - /* Remind the socket to ack eagerly... */ - ksocknal_lib_eager_ack(conn); - } - - if (nob_to_skip == 0) { /* right at next packet boundary now */ - conn->ksnc_rx_started = 0; - mb (); /* racing with timeout thread */ - - switch (conn->ksnc_proto->pro_version) { - case KSOCK_PROTO_V2: - conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg; - - if (conn->ksnc_type == SOCKLND_CONN_BULK_IN) { - /* always expect lnet_hdr_t to avoid extra-read for better performance */ - conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload); - conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload); - conn->ksnc_rx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload); - - } else { - /* can't make sure if it's noop or not */ - conn->ksnc_rx_nob_wanted = offsetof(ksock_msg_t, ksm_u); - conn->ksnc_rx_nob_left = offsetof(ksock_msg_t, ksm_u); - conn->ksnc_rx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u); - } - break; - - case KSOCK_PROTO_V1: - /* Receiving bare lnet_hdr_t */ - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(lnet_hdr_t); - conn->ksnc_rx_nob_left = sizeof(lnet_hdr_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t); - break; - - default: - LBUG (); - } - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_csum = ~0; - return (1); - } - - /* Set up to skip as much as possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - skipped = 0; - niov = 0; - - do { - nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -/* (Sink) handle incoming ZC request from sender */ -static int -ksocknal_handle_zc_req(ksock_peer_t *peer, __u64 cookie) -{ - ksock_conn_t *conn; - ksock_tx_t *tx; - ksock_sched_t *sched; - int rc; - - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked (0, peer); - if (conn == NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - CERROR("Can't find connection to send zcack.\n"); - return -ECONNRESET; - } - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - rc = ksocknal_piggyback_zcack(conn, cookie); - spin_unlock_bh (&sched->kss_lock); - - read_unlock (&ksocknal_data.ksnd_global_lock); - if (rc) { - /* Ack cookie is piggybacked */ - return 0; - } - - tx = ksocknal_alloc_tx(KSOCK_NOOP_TX_SIZE); - if (tx == NULL) { - CERROR("Can't allocate noop tx desc\n"); - return -ENOMEM; - } - - tx->tx_conn = NULL; - tx->tx_lnetmsg = NULL; - tx->tx_kiov = NULL; - tx->tx_nkiov = 0; - tx->tx_iov = tx->tx_frags.virt.iov; - tx->tx_niov = 1; - - ksocknal_init_msg(&tx->tx_msg, KSOCK_MSG_NOOP); - tx->tx_msg.ksm_zc_ack_cookie = cookie; /* incoming cookie */ - - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = ksocknal_find_conn_locked (0, peer); - if (conn == NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - ksocknal_free_tx(tx); - CERROR("Can't find connection to send zcack.\n"); - return -ECONNRESET; - } - ksocknal_queue_tx_locked(tx, conn); - - read_unlock (&ksocknal_data.ksnd_global_lock); - - return 0; -} - -/* (Sender) handle ZC_ACK from sink */ -static int -ksocknal_handle_zc_ack(ksock_peer_t *peer, __u64 cookie) -{ - ksock_tx_t *tx; - struct list_head *ctmp; - - spin_lock(&peer->ksnp_lock); - - list_for_each(ctmp, &peer->ksnp_zc_req_list) { - tx = list_entry (ctmp, ksock_tx_t, tx_zc_list); - if (tx->tx_msg.ksm_zc_req_cookie != cookie) - continue; - - tx->tx_msg.ksm_zc_req_cookie = 0; - list_del(&tx->tx_zc_list); - - spin_unlock(&peer->ksnp_lock); - - ksocknal_tx_decref(tx); - return 0; - } - spin_unlock(&peer->ksnp_lock); - - return -EPROTO; -} - -int -ksocknal_process_receive (ksock_conn_t *conn) -{ - int rc; - - LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0); - - /* NB: sched lock NOT held */ - /* SOCKNAL_RX_LNET_HEADER is here for backward compatability */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_KSM_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - again: - if (conn->ksnc_rx_nob_wanted != 0) { - rc = ksocknal_receive(conn); - - if (rc <= 0) { - LASSERT (rc != -EAGAIN); - - if (rc == 0) - CDEBUG (D_NET, "[%p] EOF from %s" - " ip %d.%d.%d.%d:%d\n", conn, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - else if (!conn->ksnc_closing) - CERROR ("[%p] Error %d on read from %s" - " ip %d.%d.%d.%d:%d\n", - conn, rc, - libcfs_id2str(conn->ksnc_peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - - /* it's not an error if conn is being closed */ - ksocknal_close_conn_and_siblings (conn, - (conn->ksnc_closing) ? 0 : rc); - return (rc == 0 ? -ESHUTDOWN : rc); - } - - if (conn->ksnc_rx_nob_wanted != 0) { - /* short read */ - return (-EAGAIN); - } - } - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_KSM_HEADER: - if (conn->ksnc_flip) { - __swab32s(&conn->ksnc_msg.ksm_type); - __swab32s(&conn->ksnc_msg.ksm_csum); - __swab64s(&conn->ksnc_msg.ksm_zc_req_cookie); - __swab64s(&conn->ksnc_msg.ksm_zc_ack_cookie); - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP && - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - /* NOOP Checksum error */ - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (-EIO); - } - - if (conn->ksnc_msg.ksm_zc_ack_cookie != 0) { - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - - rc = ksocknal_handle_zc_ack(conn->ksnc_peer, - conn->ksnc_msg.ksm_zc_ack_cookie); - if (rc != 0) { - CERROR("%s: Unknown zero copy ACK cookie: "LPU64"\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_zc_ack_cookie); - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings(conn, -EPROTO); - return (rc); - } - } - - if (conn->ksnc_msg.ksm_type == KSOCK_MSG_NOOP) { - ksocknal_new_packet (conn, 0); - return 0; /* NOOP is done and just return */ - } - LASSERT (conn->ksnc_msg.ksm_type == KSOCK_MSG_LNET); - - if (conn->ksnc_type == SOCKLND_CONN_BULK_IN) { - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - /* has read lnet_hdr_t already (re ksocknal_new_packet), fall through */ - } else { - conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(ksock_lnet_msg_t); - conn->ksnc_rx_nob_left = sizeof(ksock_lnet_msg_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(ksock_lnet_msg_t); - - conn->ksnc_rx_niov = 1; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - - goto again; /* read lnet header now */ - } - - case SOCKNAL_RX_LNET_HEADER: - /* unpack message header */ - conn->ksnc_proto->pro_unpack(&conn->ksnc_msg); - - if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) { - /* Userspace peer */ - lnet_process_id_t *id = &conn->ksnc_peer->ksnp_id; - lnet_hdr_t *lhdr = &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr; - - /* Substitute process ID assigned at connection time */ - lhdr->src_pid = cpu_to_le32(id->pid); - lhdr->src_nid = cpu_to_le64(id->nid); - } - - conn->ksnc_rx_state = SOCKNAL_RX_PARSE; - ksocknal_conn_addref(conn); /* ++ref while parsing */ - - rc = lnet_parse(conn->ksnc_peer->ksnp_ni, - &conn->ksnc_msg.ksm_u.lnetmsg.ksnm_hdr, - conn->ksnc_peer->ksnp_id.nid, conn, 0); - if (rc < 0) { - /* I just received garbage: give up on this conn */ - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - ksocknal_conn_decref(conn); - return (-EPROTO); - } - - /* I'm racing with ksocknal_recv() */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE || - conn->ksnc_rx_state == SOCKNAL_RX_LNET_PAYLOAD); - - if (conn->ksnc_rx_state != SOCKNAL_RX_LNET_PAYLOAD) - return 0; - - /* ksocknal_recv() got called */ - goto again; - - case SOCKNAL_RX_LNET_PAYLOAD: - /* payload all received */ - rc = 0; - - if (conn->ksnc_rx_nob_left == 0 && /* not truncating */ - conn->ksnc_msg.ksm_csum != 0 && /* has checksum */ - conn->ksnc_msg.ksm_csum != conn->ksnc_rx_csum) { - CERROR("%s: Checksum error, wire:0x%08X data:0x%08X\n", - libcfs_id2str(conn->ksnc_peer->ksnp_id), - conn->ksnc_msg.ksm_csum, conn->ksnc_rx_csum); - rc = -EIO; - } - - lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, rc); - - if (rc == 0 && conn->ksnc_msg.ksm_zc_req_cookie != 0) { - LASSERT(conn->ksnc_proto == &ksocknal_protocol_v2x); - rc = ksocknal_handle_zc_req(conn->ksnc_peer, - conn->ksnc_msg.ksm_zc_req_cookie); - } - - if (rc != 0) { - ksocknal_new_packet(conn, 0); - ksocknal_close_conn_and_siblings (conn, rc); - return (-EPROTO); - } - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) - return 0; /* come back later */ - goto again; /* try to finish reading slop now */ - - default: - break; - } - - /* Not Reached */ - LBUG (); - return (-EINVAL); /* keep gcc happy */ -} - -int -ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - ksock_sched_t *sched = conn->ksnc_scheduler; - - LASSERT (mlen <= rlen); - LASSERT (niov <= LNET_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - if (mlen == 0 || iov != NULL) { - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - conn->ksnc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov, - niov, iov, offset, mlen); - } else { - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - conn->ksnc_rx_nkiov = - lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov, - niov, kiov, offset, mlen); - } - - LASSERT (mlen == - lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - LASSERT (conn->ksnc_rx_scheduled); - - spin_lock_bh (&sched->kss_lock); - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_PARSE_WAIT: - list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns); - cfs_waitq_signal (&sched->kss_waitq); - LASSERT (conn->ksnc_rx_ready); - break; - - case SOCKNAL_RX_PARSE: - /* scheduler hasn't noticed I'm parsing yet */ - break; - } - - conn->ksnc_rx_state = SOCKNAL_RX_LNET_PAYLOAD; - - spin_unlock_bh (&sched->kss_lock); - ksocknal_conn_decref(conn); - return (0); -} - -static inline int -ksocknal_sched_cansleep(ksock_sched_t *sched) -{ - int rc; - - spin_lock_bh (&sched->kss_lock); - - rc = (!ksocknal_data.ksnd_shuttingdown && - list_empty(&sched->kss_rx_conns) && - list_empty(&sched->kss_tx_conns)); - - spin_unlock_bh (&sched->kss_lock); - return (rc); -} - -int ksocknal_scheduler (void *arg) -{ - ksock_sched_t *sched = (ksock_sched_t *)arg; - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; - int nloops = 0; - int id = sched - ksocknal_data.ksnd_schedulers; - char name[16]; - - snprintf (name, sizeof (name),"socknal_sd%02d", id); - cfs_daemonize (name); - cfs_block_allsigs (); - -#if defined(CONFIG_SMP) && defined(CPU_AFFINITY) - id = ksocknal_sched2cpu(id); - if (cpu_online(id)) { - cpumask_t m; - cpu_set(id, m); - set_cpus_allowed(current, m); - } else { - CERROR ("Can't set CPU affinity for %s to %d\n", name, id); - } -#endif /* CONFIG_SMP && CPU_AFFINITY */ - - spin_lock_bh (&sched->kss_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&sched->kss_rx_conns)) { - conn = list_entry(sched->kss_rx_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del(&conn->ksnc_rx_list); - - LASSERT(conn->ksnc_rx_scheduled); - LASSERT(conn->ksnc_rx_ready); - - /* clear rx_ready in case receive isn't complete. - * Do it BEFORE we call process_recv, since - * data_ready can set it any time after we release - * kss_lock. */ - conn->ksnc_rx_ready = 0; - spin_unlock_bh (&sched->kss_lock); - - rc = ksocknal_process_receive(conn); - - spin_lock_bh (&sched->kss_lock); - - /* I'm the only one that can clear this flag */ - LASSERT(conn->ksnc_rx_scheduled); - - /* Did process_receive get everything it wanted? */ - if (rc == 0) - conn->ksnc_rx_ready = 1; - - if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) { - /* Conn blocked waiting for ksocknal_recv() - * I change its state (under lock) to signal - * it can be rescheduled */ - conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT; - } else if (conn->ksnc_rx_ready) { - /* reschedule for rx */ - list_add_tail (&conn->ksnc_rx_list, - &sched->kss_rx_conns); - } else { - conn->ksnc_rx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - - if (!list_empty (&sched->kss_tx_conns)) { - CFS_LIST_HEAD (zlist); - - if (!list_empty(&sched->kss_zombie_noop_txs)) { - list_add(&zlist, &sched->kss_zombie_noop_txs); - list_del_init(&sched->kss_zombie_noop_txs); - } - - conn = list_entry(sched->kss_tx_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - LASSERT(conn->ksnc_tx_scheduled); - LASSERT(conn->ksnc_tx_ready); - LASSERT(!list_empty(&conn->ksnc_tx_queue)); - - tx = list_entry(conn->ksnc_tx_queue.next, - ksock_tx_t, tx_list); - - if (conn->ksnc_tx_mono == tx) - ksocknal_next_mono_tx(conn); - - /* dequeue now so empty list => more to send */ - list_del(&tx->tx_list); - - /* Clear tx_ready in case send isn't complete. Do - * it BEFORE we call process_transmit, since - * write_space can set it any time after we release - * kss_lock. */ - conn->ksnc_tx_ready = 0; - spin_unlock_bh (&sched->kss_lock); - - if (!list_empty(&zlist)) { - /* free zombie noop txs, it's fast because - * noop txs are just put in freelist */ - ksocknal_txlist_done(NULL, &zlist, 0); - } - - rc = ksocknal_process_transmit(conn, tx); - - if (rc == -ENOMEM || rc == -EAGAIN) { - /* Incomplete send: replace tx on HEAD of tx_queue */ - spin_lock_bh (&sched->kss_lock); - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } else { - /* Complete send; tx -ref */ - ksocknal_tx_decref (tx); - - spin_lock_bh (&sched->kss_lock); - /* assume space for more */ - conn->ksnc_tx_ready = 1; - } - - if (rc == -ENOMEM) { - /* Do nothing; after a short timeout, this - * conn will be reposted on kss_tx_conns. */ - } else if (conn->ksnc_tx_ready && - !list_empty (&conn->ksnc_tx_queue)) { - /* reschedule for tx */ - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - } else { - conn->ksnc_tx_scheduled = 0; - /* drop my ref */ - ksocknal_conn_decref(conn); - } - - did_something = 1; - } - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_bh (&sched->kss_lock); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible_exclusive( - sched->kss_waitq, - !ksocknal_sched_cansleep(sched)); - LASSERT (rc == 0); - } else { - our_cond_resched(); - } - - spin_lock_bh (&sched->kss_lock); - } - } - - spin_unlock_bh (&sched->kss_lock); - ksocknal_thread_fini (); - return (0); -} - -/* - * Add connection to kss_rx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_read_callback (ksock_conn_t *conn) -{ - ksock_sched_t *sched; - ENTRY; - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - spin_unlock_bh (&sched->kss_lock); - - EXIT; -} - -/* - * Add connection to kss_tx_conns of scheduler - * and wakeup the scheduler. - */ -void ksocknal_write_callback (ksock_conn_t *conn) -{ - ksock_sched_t *sched; - ENTRY; - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && // not being progressed - !list_empty(&conn->ksnc_tx_queue)){//packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - ksocknal_conn_addref(conn); - - cfs_waitq_signal (&sched->kss_waitq); - } - - spin_unlock_bh (&sched->kss_lock); - - EXIT; -} - -ksock_protocol_t * -ksocknal_compat_protocol (ksock_hello_msg_t *hello) -{ - if ((hello->kshm_magic == LNET_PROTO_MAGIC && - hello->kshm_version == KSOCK_PROTO_V2) || - (hello->kshm_magic == __swab32(LNET_PROTO_MAGIC) && - hello->kshm_version == __swab32(KSOCK_PROTO_V2))) - return &ksocknal_protocol_v2x; - - if (hello->kshm_magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - lnet_magicversion_t *hmv = (lnet_magicversion_t *)hello; - - CLASSERT (sizeof (lnet_magicversion_t) == - offsetof (ksock_hello_msg_t, kshm_src_nid)); - - if (hmv->version_major == cpu_to_le16 (KSOCK_PROTO_V1_MAJOR) && - hmv->version_minor == cpu_to_le16 (KSOCK_PROTO_V1_MINOR)) - return &ksocknal_protocol_v1x; - } - - return NULL; -} - -static int -ksocknal_send_hello_v1 (ksock_conn_t *conn, ksock_hello_msg_t *hello) -{ - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - lnet_magicversion_t *hmv; - int rc; - int i; - - CLASSERT(sizeof(lnet_magicversion_t) == offsetof(lnet_hdr_t, src_nid)); - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } - - hmv = (lnet_magicversion_t *)&hdr->dest_nid; - - /* Re-organize V2.x message header to V1.x (lnet_hdr_t) - * header and send out */ - hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC); - hmv->version_major = cpu_to_le16 (KSOCK_PROTO_V1_MAJOR); - hmv->version_minor = cpu_to_le16 (KSOCK_PROTO_V1_MINOR); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hmv->version_major++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - hmv->magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - hdr->src_nid = cpu_to_le64 (hello->kshm_src_nid); - hdr->src_pid = cpu_to_le32 (hello->kshm_src_pid); - hdr->type = cpu_to_le32 (LNET_MSG_HELLO); - hdr->payload_length = cpu_to_le32 (hello->kshm_nips * sizeof(__u32)); - hdr->msg.hello.type = cpu_to_le32 (hello->kshm_ctype); - hdr->msg.hello.incarnation = cpu_to_le64 (hello->kshm_src_incarnation); - - rc = libcfs_sock_write(sock, hdr, sizeof(*hdr), lnet_acceptor_timeout()); - - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - goto out; - } - - if (hello->kshm_nips == 0) - goto out; - - for (i = 0; i < hello->kshm_nips; i++) { - hello->kshm_ips[i] = __cpu_to_le32 (hello->kshm_ips[i]); - } - - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } -out: - LIBCFS_FREE(hdr, sizeof(*hdr)); - - return rc; -} - -static int -ksocknal_send_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello) -{ - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - - hello->kshm_magic = LNET_PROTO_MAGIC; - hello->kshm_version = KSOCK_PROTO_V2; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - hello->kshm_version++; /* just different! */ - the_lnet.ln_testprotocompat &= ~1; - } - LNET_UNLOCK(); - } - - rc = libcfs_sock_write(sock, hello, offsetof(ksock_hello_msg_t, kshm_ips), - lnet_acceptor_timeout()); - - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n", - rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - return rc; - } - - if (hello->kshm_nips == 0) - return 0; - - rc = libcfs_sock_write(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), - lnet_acceptor_timeout()); - if (rc != 0) { - CDEBUG (D_NETERROR, "Error %d sending HELLO payload (%d)" - " to %u.%u.%u.%u/%d\n", rc, hello->kshm_nips, - HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port); - } - - return rc; -} - -static int -ksocknal_recv_hello_v1(ksock_conn_t *conn, ksock_hello_msg_t *hello,int timeout) -{ - cfs_socket_t *sock = conn->ksnc_sock; - lnet_hdr_t *hdr; - int rc; - int i; - - LIBCFS_ALLOC(hdr, sizeof(*hdr)); - if (hdr == NULL) { - CERROR("Can't allocate lnet_hdr_t\n"); - return -ENOMEM; - } - - rc = libcfs_sock_read(sock, &hdr->src_nid, - sizeof (*hdr) - offsetof (lnet_hdr_t, src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } - - /* ...and check we got what we expected */ - if (hdr->type != cpu_to_le32 (LNET_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr," - " but got type %d from %u.%u.%u.%u\n", - le32_to_cpu (hdr->type), - HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } - - hello->kshm_src_nid = le64_to_cpu (hdr->src_nid); - hello->kshm_src_pid = le32_to_cpu (hdr->src_pid); - hello->kshm_src_incarnation = le64_to_cpu (hdr->msg.hello.incarnation); - hello->kshm_ctype = le32_to_cpu (hdr->msg.hello.type); - hello->kshm_nips = le32_to_cpu (hdr->payload_length) / - sizeof (__u32); - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - goto out; - } - - if (hello->kshm_nips == 0) - goto out; - - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - goto out; - } - - for (i = 0; i < hello->kshm_nips; i++) { - hello->kshm_ips[i] = __le32_to_cpu(hello->kshm_ips[i]); - - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - rc = -EPROTO; - break; - } - } -out: - LIBCFS_FREE(hdr, sizeof(*hdr)); - - return rc; -} - -static int -ksocknal_recv_hello_v2 (ksock_conn_t *conn, ksock_hello_msg_t *hello, int timeout) -{ - cfs_socket_t *sock = conn->ksnc_sock; - int rc; - int i; - - if (hello->kshm_magic == LNET_PROTO_MAGIC) - conn->ksnc_flip = 0; - else - conn->ksnc_flip = 1; - - rc = libcfs_sock_read(sock, &hello->kshm_src_nid, - offsetof(ksock_hello_msg_t, kshm_ips) - - offsetof(ksock_hello_msg_t, kshm_src_nid), - timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - if (conn->ksnc_flip) { - __swab32s(&hello->kshm_src_pid); - __swab64s(&hello->kshm_src_nid); - __swab32s(&hello->kshm_dst_pid); - __swab64s(&hello->kshm_dst_nid); - __swab64s(&hello->kshm_src_incarnation); - __swab64s(&hello->kshm_dst_incarnation); - __swab32s(&hello->kshm_ctype); - __swab32s(&hello->kshm_nips); - } - - if (hello->kshm_nips > LNET_MAX_INTERFACES) { - CERROR("Bad nips %d from ip %u.%u.%u.%u\n", - hello->kshm_nips, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - if (hello->kshm_nips == 0) - return 0; - - rc = libcfs_sock_read(sock, hello->kshm_ips, - hello->kshm_nips * sizeof(__u32), timeout); - if (rc != 0) { - CERROR ("Error %d reading IPs from ip %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - for (i = 0; i < hello->kshm_nips; i++) { - if (conn->ksnc_flip) - __swab32s(&hello->kshm_ips[i]); - - if (hello->kshm_ips[i] == 0) { - CERROR("Zero IP[%d] from ip %u.%u.%u.%u\n", - i, HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } - - return 0; -} - -static void -ksocknal_pack_msg_v1(ksock_tx_t *tx) -{ - /* V1.x has no KSOCK_MSG_NOOP */ - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - LASSERT(tx->tx_lnetmsg != NULL); - - tx->tx_iov[0].iov_base = (void *)&tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = sizeof(lnet_hdr_t); - - tx->tx_resid = tx->tx_nob = tx->tx_lnetmsg->msg_len + sizeof(lnet_hdr_t); -} - -static void -ksocknal_pack_msg_v2(ksock_tx_t *tx) -{ - tx->tx_iov[0].iov_base = (void *)&tx->tx_msg; - - if (tx->tx_lnetmsg != NULL) { - LASSERT(tx->tx_msg.ksm_type != KSOCK_MSG_NOOP); - - tx->tx_msg.ksm_u.lnetmsg.ksnm_hdr = tx->tx_lnetmsg->msg_hdr; - tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_payload) + - tx->tx_lnetmsg->msg_len; - } else { - LASSERT(tx->tx_msg.ksm_type == KSOCK_MSG_NOOP); - - tx->tx_iov[0].iov_len = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - tx->tx_resid = tx->tx_nob = offsetof(ksock_msg_t, ksm_u.lnetmsg.ksnm_hdr); - } - /* Don't checksum before start sending, because packet can be piggybacked with ACK */ -} - -static void -ksocknal_unpack_msg_v1(ksock_msg_t *msg) -{ - msg->ksm_type = KSOCK_MSG_LNET; - msg->ksm_csum = 0; - msg->ksm_zc_req_cookie = 0; - msg->ksm_zc_ack_cookie = 0; -} - -static void -ksocknal_unpack_msg_v2(ksock_msg_t *msg) -{ - return; /* Do nothing */ -} - -ksock_protocol_t ksocknal_protocol_v1x = -{ - KSOCK_PROTO_V1, - ksocknal_send_hello_v1, - ksocknal_recv_hello_v1, - ksocknal_pack_msg_v1, - ksocknal_unpack_msg_v1 -}; - -ksock_protocol_t ksocknal_protocol_v2x = -{ - KSOCK_PROTO_V2, - ksocknal_send_hello_v2, - ksocknal_recv_hello_v2, - ksocknal_pack_msg_v2, - ksocknal_unpack_msg_v2 -}; - -int -ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, - lnet_nid_t peer_nid, ksock_hello_msg_t *hello) -{ - /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */ - ksock_net_t *net = (ksock_net_t *)ni->ni_data; - lnet_nid_t srcnid; - - LASSERT (0 <= hello->kshm_nips && hello->kshm_nips <= LNET_MAX_INTERFACES); - - /* No need for getconnsock/putconnsock */ - LASSERT (!conn->ksnc_closing); - LASSERT (conn->ksnc_proto != NULL); - - srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid); - - hello->kshm_src_nid = srcnid; - hello->kshm_dst_nid = peer_nid; - hello->kshm_src_pid = the_lnet.ln_pid; - - hello->kshm_src_incarnation = net->ksnn_incarnation; - hello->kshm_ctype = conn->ksnc_type; - - return conn->ksnc_proto->pro_send_hello(conn, hello); -} - -int -ksocknal_invert_type(int type) -{ - switch (type) - { - case SOCKLND_CONN_ANY: - case SOCKLND_CONN_CONTROL: - return (type); - case SOCKLND_CONN_BULK_IN: - return SOCKLND_CONN_BULK_OUT; - case SOCKLND_CONN_BULK_OUT: - return SOCKLND_CONN_BULK_IN; - default: - return (SOCKLND_CONN_NONE); - } -} - -int -ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn, - ksock_hello_msg_t *hello, lnet_process_id_t *peerid, - __u64 *incarnation) -{ - cfs_socket_t *sock = conn->ksnc_sock; - int active; - int timeout; - int match = 0; - int rc; - ksock_protocol_t *proto; - lnet_process_id_t recv_id; - - active = (peerid->nid != LNET_NID_ANY); - timeout = active ? *ksocknal_tunables.ksnd_timeout : - lnet_acceptor_timeout(); - - rc = libcfs_sock_read(sock, &hello->kshm_magic, sizeof (hello->kshm_magic), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - if (hello->kshm_magic != LNET_PROTO_MAGIC && - hello->kshm_magic != __swab32(LNET_PROTO_MAGIC) && - hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) { - /* Unexpected magic! */ - if (active || - the_lnet.ln_ptlcompat == 0) { - CERROR ("Bad magic(1) %#08x (%#08x expected) from " - "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - /* When portals compatibility is set, I may be passed a new - * connection "blindly" by the acceptor, and I have to - * determine if my peer has sent an acceptor connection request - * or not. This isn't a 'hello', so I'll get the acceptor to - * look at it... */ - rc = lnet_accept(ni, sock, hello->kshm_magic); - if (rc != 0) - return -EPROTO; - - /* ...and if it's OK I'm back to looking for a 'hello'... */ - rc = libcfs_sock_read(sock, &hello->kshm_magic, - sizeof (hello->kshm_magic), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - /* Only need to check V1.x magic */ - if (hello->kshm_magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) { - CERROR ("Bad magic(2) %#08x (%#08x expected) from " - "%u.%u.%u.%u\n", __cpu_to_le32 (hello->kshm_magic), - LNET_PROTO_TCP_MAGIC, - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } - - rc = libcfs_sock_read(sock, &hello->kshm_version, - sizeof(hello->kshm_version), timeout); - if (rc != 0) { - CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - LASSERT (rc < 0 && rc != -EALREADY); - return rc; - } - - proto = ksocknal_compat_protocol(hello); - if (proto == NULL) { - if (!active) { - /* unknown protocol from peer, tell peer my protocol */ - conn->ksnc_proto = &ksocknal_protocol_v2x; - hello->kshm_nips = 0; - ksocknal_send_hello(ni, conn, ni->ni_nid, hello); - } - - CERROR ("Unknown protocol version (%d.x expected)" - " from %u.%u.%u.%u\n", - conn->ksnc_proto->pro_version, - HIPQUAD(conn->ksnc_ipaddr)); - - return -EPROTO; - } - - if (conn->ksnc_proto == proto) - match = 1; - - conn->ksnc_proto = proto; - - /* receive the rest of hello message anyway */ - rc = conn->ksnc_proto->pro_recv_hello(conn, hello, timeout); - if (rc != 0) { - CERROR("Error %d reading or checking hello from from %u.%u.%u.%u\n", - rc, HIPQUAD(conn->ksnc_ipaddr)); - return rc; - } - - if (hello->kshm_src_nid == LNET_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY" - "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - - if (conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - /* Userspace NAL assigns peer process ID from socket */ - recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG; - recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr); - } else { - recv_id.nid = hello->kshm_src_nid; - - if (the_lnet.ln_ptlcompat > 1 && /* portals peers may exist */ - LNET_NIDNET(recv_id.nid) == 0) /* this is one */ - recv_id.pid = the_lnet.ln_pid; /* give it a sensible pid */ - else - recv_id.pid = hello->kshm_src_pid; - - } - - if (!active) { /* don't know peer's nid yet */ - *peerid = recv_id; - } else if (peerid->pid != recv_id.pid || - !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) { - LCONSOLE_ERROR("Connected successfully to %s on host " - "%u.%u.%u.%u, but they claimed they were " - "%s; please check your Lustre " - "configuration.\n", - libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr), - libcfs_id2str(recv_id)); - return -EPROTO; - } - - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - /* I've accepted this connection; peer determines type */ - conn->ksnc_type = ksocknal_invert_type(hello->kshm_ctype); - if (conn->ksnc_type == SOCKLND_CONN_NONE) { - CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n", - hello->kshm_ctype, libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr)); - return -EPROTO; - } - } else if (hello->kshm_ctype == SOCKLND_CONN_NONE) { - if (match) { - /* lost a connection race */ - return -EALREADY; - } - /* unmatched protocol get SOCKLND_CONN_NONE anyway */ - } else if (ksocknal_invert_type(hello->kshm_ctype) != conn->ksnc_type) { - CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n", - conn->ksnc_type, libcfs_id2str(*peerid), - HIPQUAD(conn->ksnc_ipaddr), - hello->kshm_ctype); - return -EPROTO; - } - - *incarnation = hello->kshm_src_incarnation; - - return 0; -} - -void -ksocknal_connect (ksock_route_t *route) -{ - CFS_LIST_HEAD (zombies); - ksock_peer_t *peer = route->ksnr_peer; - int type; - int wanted; - cfs_socket_t *sock; - cfs_time_t deadline; - int retry_later = 0; - int rc = 0; - - deadline = cfs_time_add(cfs_time_current(), - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout)); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - LASSERT (route->ksnr_scheduled); - LASSERT (!route->ksnr_connecting); - - route->ksnr_connecting = 1; - - for (;;) { - wanted = ksocknal_route_mask() & ~route->ksnr_connected; - - /* stop connecting if peer/route got closed under me, or - * route got connected while queued */ - if (peer->ksnp_closing || route->ksnr_deleted || - wanted == 0) { - retry_later = 0; - break; - } - - /* reschedule if peer is connecting to me */ - if (peer->ksnp_accepting > 0) { - CDEBUG(D_NET, - "peer %s(%d) already connecting to me, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid), peer->ksnp_accepting); - retry_later = 1; - } - - if (retry_later) /* needs reschedule */ - break; - - if ((wanted & (1 << SOCKLND_CONN_ANY)) != 0) { - type = SOCKLND_CONN_ANY; - } else if ((wanted & (1 << SOCKLND_CONN_CONTROL)) != 0) { - type = SOCKLND_CONN_CONTROL; - } else if ((wanted & (1 << SOCKLND_CONN_BULK_IN)) != 0) { - type = SOCKLND_CONN_BULK_IN; - } else { - LASSERT ((wanted & (1 << SOCKLND_CONN_BULK_OUT)) != 0); - type = SOCKLND_CONN_BULK_OUT; - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (cfs_time_aftereq(cfs_time_current(), deadline)) { - rc = -ETIMEDOUT; - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - rc = lnet_connect(&sock, peer->ksnp_id.nid, - route->ksnr_myipaddr, - route->ksnr_ipaddr, route->ksnr_port); - if (rc != 0) - goto failed; - - rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type); - - if (rc < 0) { - lnet_connect_console_error(rc, peer->ksnp_id.nid, - route->ksnr_ipaddr, - route->ksnr_port); - goto failed; - } - - /* rc == EALREADY means I lost a connection race and my - * peer is connecting to me. - * rc == EPROTO means my peer is speaking an older - * protocol version. */ - LASSERT (rc == 0 || rc == EALREADY || rc == EPROTO); - - retry_later = rc != 0; - if (retry_later) - CDEBUG(D_NET, "peer %s: conn race, retry later.\n", - libcfs_nid2str(peer->ksnp_id.nid)); - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - } - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - if (retry_later) { - /* re-queue for attention; this frees me up to handle - * the peer's incoming connection request */ - ksocknal_launch_connection_locked(route); - } - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - return; - - failed: - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - route->ksnr_scheduled = 0; - route->ksnr_connecting = 0; - - /* This is a retry rather than a new connection */ - route->ksnr_retry_interval *= 2; - route->ksnr_retry_interval = - MAX(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000); - route->ksnr_retry_interval = - MIN(route->ksnr_retry_interval, - cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000); - - LASSERT (route->ksnr_retry_interval != 0); - route->ksnr_timeout = cfs_time_add(cfs_time_current(), - route->ksnr_retry_interval); - - if (!list_empty(&peer->ksnp_tx_queue) && - peer->ksnp_accepting == 0 && - ksocknal_find_connecting_route_locked(peer) == NULL) { - /* ksnp_tx_queue is queued on a conn on successful - * connection */ - LASSERT (list_empty (&peer->ksnp_conns)); - - /* take all the blocked packets while I've got the lock and - * complete below... */ - list_add(&zombies, &peer->ksnp_tx_queue); - list_del_init(&peer->ksnp_tx_queue); - } - -#if 0 /* irrelevent with only eager routes */ - if (!route->ksnr_deleted) { - /* make this route least-favourite for re-selection */ - list_del(&route->ksnr_list); - list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - } -#endif - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - ksocknal_peer_failed(peer); - ksocknal_txlist_done(peer->ksnp_ni, &zombies, 1); -} - -static inline int -ksocknal_connd_connect_route_locked(void) -{ - /* Only handle an outgoing connection request if there is someone left - * to handle incoming connections */ - return !list_empty(&ksocknal_data.ksnd_connd_routes) && - ((ksocknal_data.ksnd_connd_connecting + 1) < - *ksocknal_tunables.ksnd_nconnds); -} - -static inline int -ksocknal_connd_ready(void) -{ - int rc; - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - rc = ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_connd_connreqs) || - ksocknal_connd_connect_route_locked(); - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - return rc; -} - -int -ksocknal_connd (void *arg) -{ - long id = (long)arg; - char name[16]; - ksock_connreq_t *cr; - ksock_route_t *route; - - snprintf (name, sizeof (name), "socknal_cd%02ld", id); - cfs_daemonize (name); - cfs_block_allsigs (); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - - if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) { - /* Connection accepted by the listener */ - cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next, - ksock_connreq_t, ksncr_list); - - list_del(&cr->ksncr_list); - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_create_conn(cr->ksncr_ni, NULL, - cr->ksncr_sock, SOCKLND_CONN_NONE); - lnet_ni_decref(cr->ksncr_ni); - LIBCFS_FREE(cr, sizeof(*cr)); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - } - - if (ksocknal_connd_connect_route_locked()) { - /* Connection request */ - route = list_entry (ksocknal_data.ksnd_connd_routes.next, - ksock_route_t, ksnr_connd_list); - - list_del (&route->ksnr_connd_list); - ksocknal_data.ksnd_connd_connecting++; - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_connect (route); - ksocknal_route_decref(route); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - ksocknal_data.ksnd_connd_connecting--; - } - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - wait_event_interruptible_exclusive( - ksocknal_data.ksnd_connd_waitq, - ksocknal_connd_ready()); - - spin_lock_bh (&ksocknal_data.ksnd_connd_lock); - } - - spin_unlock_bh (&ksocknal_data.ksnd_connd_lock); - - ksocknal_thread_fini (); - return (0); -} - -ksock_conn_t * -ksocknal_find_timed_out_conn (ksock_peer_t *peer) -{ - /* We're called with a shared lock on ksnd_global_lock */ - ksock_conn_t *conn; - struct list_head *ctmp; - - list_for_each (ctmp, &peer->ksnp_conns) { - int error; - conn = list_entry (ctmp, ksock_conn_t, ksnc_list); - - /* Don't need the {get,put}connsock dance to deref ksnc_sock */ - LASSERT (!conn->ksnc_closing); - - /* SOCK_ERROR will reset error code of socket in - * some platform (like Darwin8.x) */ - error = SOCK_ERROR(conn->ksnc_sock); - if (error != 0) { - ksocknal_conn_addref(conn); - - switch (error) { - case ECONNRESET: - CDEBUG(D_NETERROR, "A connection with %s " - "(%u.%u.%u.%u:%d) was reset; " - "it may have rebooted.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - case ETIMEDOUT: - CDEBUG(D_NETERROR, "A connection with %s " - "(%u.%u.%u.%u:%d) timed out; the " - "network or node may be down.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - default: - CDEBUG(D_NETERROR, "An unexpected network error %d " - "occurred with %s " - "(%u.%u.%u.%u:%d\n", error, - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - break; - } - - return (conn); - } - - if (conn->ksnc_rx_started && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_rx_deadline)) { - /* Timed out incomplete incoming message */ - ksocknal_conn_addref(conn); - CDEBUG(D_NETERROR, "Timeout receiving from %s " - "(%u.%u.%u.%u:%d), state %d wanted %d left %d\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port, - conn->ksnc_rx_state, - conn->ksnc_rx_nob_wanted, - conn->ksnc_rx_nob_left); - return (conn); - } - - if ((!list_empty(&conn->ksnc_tx_queue) || - SOCK_WMEM_QUEUED(conn->ksnc_sock) != 0) && - cfs_time_aftereq(cfs_time_current(), - conn->ksnc_tx_deadline)) { - /* Timed out messages queued for sending or - * buffered in the socket's send buffer */ - ksocknal_conn_addref(conn); - CDEBUG(D_NETERROR, "Timeout sending data to %s " - "(%u.%u.%u.%u:%d) the network or that " - "node may be down.\n", - libcfs_id2str(peer->ksnp_id), - HIPQUAD(conn->ksnc_ipaddr), - conn->ksnc_port); - return (conn); - } - } - - return (NULL); -} - -void -ksocknal_check_peer_timeouts (int idx) -{ - struct list_head *peers = &ksocknal_data.ksnd_peers[idx]; - struct list_head *ptmp; - ksock_peer_t *peer; - ksock_conn_t *conn; - - again: - /* NB. We expect to have a look at all the peers and not find any - * connections to time out, so we just use a shared lock while we - * take a look... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, ksock_peer_t, ksnp_list); - conn = ksocknal_find_timed_out_conn (peer); - - if (conn != NULL) { - read_unlock (&ksocknal_data.ksnd_global_lock); - - ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT); - - /* NB we won't find this one again, but we can't - * just proceed with the next peer, since we dropped - * ksnd_global_lock and it might be dead already! */ - ksocknal_conn_decref(conn); - goto again; - } - } - - read_unlock (&ksocknal_data.ksnd_global_lock); -} - -int -ksocknal_reaper (void *arg) -{ - cfs_waitlink_t wait; - ksock_conn_t *conn; - ksock_sched_t *sched; - struct list_head enomem_conns; - int nenomem_conns; - cfs_duration_t timeout; - int i; - int peer_index = 0; - cfs_time_t deadline = cfs_time_current(); - - cfs_daemonize ("socknal_reaper"); - cfs_block_allsigs (); - - CFS_INIT_LIST_HEAD(&enomem_conns); - cfs_waitlink_init (&wait); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - - while (!ksocknal_data.ksnd_shuttingdown) { - - if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) { - conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_terminate_conn (conn); - ksocknal_conn_decref(conn); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) { - conn = list_entry (ksocknal_data.ksnd_zombie_conns.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_destroy_conn (conn); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - continue; - } - - if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) { - list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns); - list_del_init(&ksocknal_data.ksnd_enomem_conns); - } - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - /* reschedule all the connections that stalled with ENOMEM... */ - nenomem_conns = 0; - while (!list_empty (&enomem_conns)) { - conn = list_entry (enomem_conns.next, - ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - sched = conn->ksnc_scheduler; - - spin_lock_bh (&sched->kss_lock); - - LASSERT (conn->ksnc_tx_scheduled); - conn->ksnc_tx_ready = 1; - list_add_tail(&conn->ksnc_tx_list, &sched->kss_tx_conns); - cfs_waitq_signal (&sched->kss_waitq); - - spin_unlock_bh (&sched->kss_lock); - nenomem_conns++; - } - - /* careful with the jiffy wrap... */ - while ((timeout = cfs_time_sub(deadline, - cfs_time_current())) <= 0) { - const int n = 4; - const int p = 1; - int chunk = ksocknal_data.ksnd_peer_hash_size; - - /* Time to check for timeouts on a few more peers: I do - * checks every 'p' seconds on a proportion of the peer - * table and I need to check every connection 'n' times - * within a timeout interval, to ensure I detect a - * timeout on any connection within (n+1)/n times the - * timeout interval. */ - - if (*ksocknal_tunables.ksnd_timeout > n * p) - chunk = (chunk * n * p) / - *ksocknal_tunables.ksnd_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - ksocknal_check_peer_timeouts (peer_index); - peer_index = (peer_index + 1) % - ksocknal_data.ksnd_peer_hash_size; - } - - deadline = cfs_time_add(deadline, cfs_time_seconds(p)); - } - - if (nenomem_conns != 0) { - /* Reduce my timeout if I rescheduled ENOMEM conns. - * This also prevents me getting woken immediately - * if any go back on my enomem list. */ - timeout = SOCKNAL_ENOMEM_RETRY; - } - ksocknal_data.ksnd_reaper_waketime = - cfs_time_add(cfs_time_current(), timeout); - - set_current_state (TASK_INTERRUPTIBLE); - cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait); - - if (!ksocknal_data.ksnd_shuttingdown && - list_empty (&ksocknal_data.ksnd_deathrow_conns) && - list_empty (&ksocknal_data.ksnd_zombie_conns)) - cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout); - - set_current_state (TASK_RUNNING); - cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait); - - spin_lock_bh (&ksocknal_data.ksnd_reaper_lock); - } - - spin_unlock_bh (&ksocknal_data.ksnd_reaper_lock); - - ksocknal_thread_fini (); - return (0); -} diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c deleted file mode 100644 index 25d6b453197e877ee1662d85127a7636c531c333..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.c +++ /dev/null @@ -1,1072 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#include <mach/mach_types.h> -#include <string.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <sys/file.h> - -#include "socklnd.h" - -# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -SYSCTL_DECL(_lnet); - -SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW, - 0, "ksocknal_sysctl"); - -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout, - 0, "timeout"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits, - 0, "credits"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peercredits, - 0, "peer_credits"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds, - 0, "nconnds"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms, - 0, "min_reconnectms"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms, - 0, "max_reconnectms"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack, - 0, "eager_ack"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns, - 0, "typed"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk, - 0, "min_bulk"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, rx_buffer_size, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_rx_buffer_size, - 0, "rx_buffer_size"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, tx_buffer_size, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_tx_buffer_size, - 0, "tx_buffer_size"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle, - 0, "nagle"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle, - 0, "keepalive_idle"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count, - 0, "keepalive_count"); -SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl, - CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl, - 0, "keepalive_intvl"); - -cfs_sysctl_table_t ksocknal_top_ctl_table [] = { - &sysctl__lnet_ksocknal, - &sysctl__lnet_ksocknal_timeout, - &sysctl__lnet_ksocknal_credits, - &sysctl__lnet_ksocknal_peer_credits, - &sysctl__lnet_ksocknal_nconnds, - &sysctl__lnet_ksocknal_min_reconnectms, - &sysctl__lnet_ksocknal_max_reconnectms, - &sysctl__lnet_ksocknal_eager_ack, - &sysctl__lnet_ksocknal_typed, - &sysctl__lnet_ksocknal_min_bulk, - &sysctl__lnet_ksocknal_rx_buffer_size, - &sysctl__lnet_ksocknal_tx_buffer_size, - &sysctl__lnet_ksocknal_nagle, - &sysctl__lnet_ksocknal_keepalive_idle, - &sysctl__lnet_ksocknal_keepalive_count, - &sysctl__lnet_ksocknal_keepalive_intvl, - NULL -}; - -int -ksocknal_lib_tunables_init () -{ - ksocknal_tunables.ksnd_sysctl = - cfs_register_sysctl_table (ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - return -ENOMEM; - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif - -/* - * To use bigger buffer for socket: - * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so - * we must patch kernel). - * 2. Increase net.inet.tcp.reass.maxsegments - * 3. Increase net.inet.tcp.sendspace - * 4. Increase net.inet.tcp.recvspace - * 5. Increase kern.ipc.maxsockbuf - */ -#define KSOCKNAL_MAX_BUFFER (1152*1024) - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ - return; -} - -unsigned int -ksocknal_lib_sock_irq (cfs_socket_t *sock) -{ - return 0; -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -#ifdef __DARWIN8__ - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - size_t sndlen; - int nob; - int rc; - -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - - /* - * XXX Liang: - * Linux has MSG_MORE, do we have anything to - * reduce number of partial TCP segments sent? - */ - rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); - if (rc == 0) - rc = sndlen; - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - size_t sndlen; - -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - - /* - * XXX Liang: - * Linux has MSG_MORE, do wen have anyting to - * reduce number of partial TCP segments sent? - */ - rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen); - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - if (rc == 0) - rc = sndlen; - return rc; -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - size_t rcvlen; - int nob; - int i; - int rc; - - LASSERT (niov > 0); - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); - if (rc == 0) - rc = rcvlen; - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - int nob; - int i; - size_t rcvlen; - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \ - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - rc = -sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen); - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - if (rc == 0) - rc = rcvlen; - return (rc); -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - /* XXX Liang: */ -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - socket_t sock = C2B_SOCK(conn->ksnc_sock); - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - rc = libcfs_sock_getbuf(conn->ksnc_sock, txmem, rxmem); - if (rc == 0) { - len = sizeof(*nagle); - rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - nagle, &len); - } - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_setup_sock (cfs_socket_t *sock) -{ - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - socket_t so = C2B_SOCK(sock); - struct linger linger; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - linger.l_onoff = 0; - linger.l_linger = 0; - rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger)); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - rc = libcfs_sock_setbuf(sock, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - option = (do_keepalive ? 1 : 0); - - rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option)); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (rc); - rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE, - &keep_idle, sizeof(keep_idle)); - - return (rc); -} - -void -ksocknal_lib_push_conn(ksock_conn_t *conn) -{ - socket_t sock; - int val = 1; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - sock = C2B_SOCK(conn->ksnc_sock); - - rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)); - LASSERT(rc == 0); - - ksocknal_connsock_decref(conn); - return; -} - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); - -static void -ksocknal_upcall(socket_t so, void *arg, int waitf) -{ - ksock_conn_t *conn = (ksock_conn_t *)arg; - ENTRY; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (conn == NULL) - goto out; - - ksocknal_read_callback (conn); - /* XXX Liang */ - ksocknal_write_callback (conn); -out: - read_unlock (&ksocknal_data.ksnd_global_lock); - EXIT; -} - -void -ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - /* No callback need to save in osx */ - return; -} - -void -ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - libcfs_sock_set_cb(sock, ksocknal_upcall, (void *)conn); - return; -} - -void -ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn) -{ - libcfs_sock_reset_cb(sock); -} - -#else /* !__DARWIN8__ */ - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct socket *sock = conn->ksnc_sock; - int nob; - int rc; - int i; - struct uio suio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* This will be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - suio.uio_resid = nob; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); - CFS_NET_EX; - - /* NB there is no return value can indicate how many - * have been sent and how many resid, we have to get - * sent bytes from suio. */ - if (rc != 0) { - if (suio.uio_resid != nob &&\ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* We have sent something */ - rc = nob - suio.uio_resid; - else if ( rc == EWOULDBLOCK ) - /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else /* rc == 0 */ - rc = nob - suio.uio_resid; - - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int nob; - int rc; - int i; - struct uio suio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* It should be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - suio.uio_resid = nob; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); - CFS_NET_EX; - - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - - if (rc != 0) { - if (suio.uio_resid != nob &&\ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* We have sent something */ - rc = nob - suio.uio_resid; - else if ( rc == EWOULDBLOCK ) - /* EAGAIN and EWOULD BLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else /* rc == 0 */ - rc = nob - suio.uio_resid; - - return rc; -} - -/* - * liang: Hack of inpcb and tcpcb. - * To get tcpcb of a socket, and call tcp_output - * to send quick ack. - */ -struct ks_tseg_qent{ - int foo; -}; - -struct ks_tcptemp{ - int foo; -}; - -LIST_HEAD(ks_tsegqe_head, ks_tseg_qent); - -struct ks_tcpcb { - struct ks_tsegqe_head t_segq; - int t_dupacks; - struct ks_tcptemp *unused; - int t_timer[4]; - struct inpcb *t_inpcb; - int t_state; - u_int t_flags; - /* - * There are more fields but we dont need - * ...... - */ -}; - -#define TF_ACKNOW 0x00001 -#define TF_DELACK 0x00002 - -struct ks_inpcb { - LIST_ENTRY(ks_inpcb) inp_hash; - struct in_addr reserved1; - struct in_addr reserved2; - u_short inp_fport; - u_short inp_lport; - LIST_ENTRY(inpcb) inp_list; - caddr_t inp_ppcb; - /* - * There are more fields but we dont need - * ...... - */ -}; - -#define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb) -#define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb) -#define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so))) - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - struct socket *sock = conn->ksnc_sock; - struct ks_inpcb *inp = ks_sotoinpcb(sock); - struct ks_tcpcb *tp = ks_intotcpcb(inp); - int s; - CFS_DECL_NET_DATA; - - extern int tcp_output(register struct ks_tcpcb *tp); - - CFS_NET_IN; - s = splnet(); - - /* - * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo - * to send immediate ACK. - */ - if (tp && tp->t_flags & TF_DELACK){ - tp->t_flags &= ~TF_DELACK; - tp->t_flags |= TF_ACKNOW; - (void) tcp_output(tp); - } - splx(s); - - CFS_NET_EX; - - return; -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - int nob; - int rc; - int i; - struct uio ruio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, /* It should be valued after a while */ - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - ruio.uio_resid = nob; - - CFS_NET_IN; - rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags); - CFS_NET_EX; - if (rc){ - if (ruio.uio_resid != nob && \ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN)) - /* data particially received */ - rc = nob - ruio.uio_resid; - else if (rc == EWOULDBLOCK) - /* EAGAIN and EWOULD BLOCK have same value in OSX */ - rc = -EAGAIN; - else - rc = -rc; - } else - rc = nob - ruio.uio_resid; - - return (rc); -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int nob; - int rc; - int i; - struct uio ruio = { - .uio_iov = scratchiov, - .uio_iovcnt = niov, - .uio_offset = 0, - .uio_resid = 0, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - int flags = MSG_DONTWAIT; - CFS_DECL_NET_DATA; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - ruio.uio_resid = nob; - - CFS_NET_IN; - rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags); - CFS_NET_EX; - - for (i = 0; i < niov; i++) - cfs_kunmap(kiov[i].kiov_page); - - if (rc){ - if (ruio.uio_resid != nob && \ - (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) - /* data particially received */ - rc = nob - ruio.uio_resid; - else if (rc == EWOULDBLOCK) - /* receive blocked, EWOULDBLOCK == EAGAIN */ - rc = -EAGAIN; - else - rc = -rc; - } else - rc = nob - ruio.uio_resid; - - return (rc); -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - struct socket *sock = conn->ksnc_sock; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return -ESHUTDOWN; - } - rc = libcfs_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { - struct sockopt sopt; - int len; - CFS_DECL_NET_DATA; - - len = sizeof(*nagle); - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_GET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = nagle; - sopt.sopt_valsize = len; - - CFS_NET_IN; - rc = -sogetopt(sock, &sopt); - CFS_NET_EX; - } - - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - return (rc); -} - -int -ksocknal_lib_setup_sock (struct socket *so) -{ - struct sockopt sopt; - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - CFS_DECL_NET_DATA; - - rc = libcfs_sock_setbuf(so, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - bzero(&sopt, sizeof sopt); - - linger.l_onoff = 0; - linger.l_linger = 0; - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_LINGER; - sopt.sopt_val = &linger; - sopt.sopt_valsize = sizeof(linger); - - CFS_NET_IN; - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - goto out; - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - goto out; - } - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - option = (do_keepalive ? 1 : 0); - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_KEEPALIVE; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - goto out; - } - - if (!do_keepalive) { - /* no more setting, just return */ - rc = 0; - goto out; - } - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_KEEPALIVE; - sopt.sopt_val = &keep_idle; - sopt.sopt_valsize = sizeof(keep_idle); - rc = -sosetopt(so, &sopt); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc); - goto out; - } -out: - CFS_NET_EX; - return (rc); -} - -void -ksocknal_lib_push_conn(ksock_conn_t *conn) -{ - struct socket *sock; - struct sockopt sopt; - int val = 1; - int rc; - CFS_DECL_NET_DATA; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - sock = conn->ksnc_sock; - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = IPPROTO_TCP; - sopt.sopt_name = TCP_NODELAY; - sopt.sopt_val = &val; - sopt.sopt_valsize = sizeof val; - - CFS_NET_IN; - sosetopt(sock, &sopt); - CFS_NET_EX; - - ksocknal_connsock_decref(conn); - return; -} - - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); - -static void -ksocknal_upcall(struct socket *so, caddr_t arg, int waitf) -{ - ksock_conn_t *conn = (ksock_conn_t *)arg; - ENTRY; - - read_lock (&ksocknal_data.ksnd_global_lock); - if (conn == NULL) - goto out; - - if (so->so_rcv.sb_flags & SB_UPCALL) { - extern int soreadable(struct socket *so); - if (conn->ksnc_rx_nob_wanted && soreadable(so)) - /* To verify whether the upcall is for receive */ - ksocknal_read_callback (conn); - } - /* go foward? */ - if (so->so_snd.sb_flags & SB_UPCALL){ - extern int sowriteable(struct socket *so); - if (sowriteable(so)) - /* socket is writable */ - ksocknal_write_callback(conn); - } -out: - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ - /* No callback need to save in osx */ - return; -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - sock->so_upcallarg = (void *)conn; - sock->so_upcall = ksocknal_upcall; - sock->so_snd.sb_timeo = 0; - sock->so_rcv.sb_timeo = cfs_time_seconds(2); - sock->so_rcv.sb_flags |= SB_UPCALL; - sock->so_snd.sb_flags |= SB_UPCALL; - CFS_NET_EX; - return; -} - -void -ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - ksocknal_upcall (sock, (void *)conn, 0); - CFS_NET_EX; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - CFS_DECL_NET_DATA; - - CFS_NET_IN; - sock->so_rcv.sb_flags &= ~SB_UPCALL; - sock->so_snd.sb_flags &= ~SB_UPCALL; - sock->so_upcall = NULL; - sock->so_upcallarg = NULL; - CFS_NET_EX; -} - -#endif /* !__DARWIN8__ */ diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.h b/lnet/klnds/socklnd/socklnd_lib-darwin.h deleted file mode 100644 index 9e7574ac807c194eb9ff5b62a9328c1ae8f274d8..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-darwin.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __XNU_SOCKNAL_LIB_H__ -#define __XNU_SOCKNAL_LIB_H__ - -#include <sys/kernel.h> -#include <sys/file.h> -#include <sys/filedesc.h> -#include <sys/stat.h> -#include <sys/vnode.h> -#include <sys/mount.h> -#include <sys/proc.h> -#include <sys/sysctl.h> -#include <sys/ubc.h> -#include <sys/uio.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/domain.h> -#include <sys/protosw.h> -#include <sys/namei.h> -#include <sys/fcntl.h> -#include <sys/lockf.h> -#include <sys/syslog.h> -#include <machine/spl.h> -#include <mach/mach_types.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <stdarg.h> - -#include <libcfs/libcfs.h> - -static inline -int ksocknal_nsched(void) -{ - /* XXX Liang: fix it */ - return 1; -} - -#endif diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c deleted file mode 100644 index b7e2f49eb2c0f3d4efde9d6754191c65d79f85a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-linux.c +++ /dev/null @@ -1,879 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#include "socklnd.h" - -# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table ksocknal_ctl_table[21]; - -ctl_table ksocknal_top_ctl_table[] = { - {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, - { 0 } -}; - -int -ksocknal_lib_tunables_init () -{ - int i = 0; - int j = 1; - - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "timeout", ksocknal_tunables.ksnd_timeout, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "credits", ksocknal_tunables.ksnd_credits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "peer_credits", ksocknal_tunables.ksnd_peercredits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nconnds", ksocknal_tunables.ksnd_nconnds, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "zero_copy", ksocknal_tunables.ksnd_zc_min_frag, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "typed", ksocknal_tunables.ksnd_typed_conns, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "rx_buffer_size", ksocknal_tunables.ksnd_rx_buffer_size, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "tx_buffer_size", ksocknal_tunables.ksnd_tx_buffer_size, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nagle", ksocknal_tunables.ksnd_nagle, - sizeof(int), 0644, NULL, &proc_dointvec}; -#if CPU_AFFINITY - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "irq_affinity", ksocknal_tunables.ksnd_irq_affinity, - sizeof(int), 0644, NULL, &proc_dointvec}; -#endif - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_idle", ksocknal_tunables.ksnd_keepalive_idle, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_count", ksocknal_tunables.ksnd_keepalive_count, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_intvl", ksocknal_tunables.ksnd_keepalive_intvl, - sizeof(int), 0644, NULL, &proc_dointvec}; -#ifdef SOCKNAL_BACKOFF - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "backoff_init", ksocknal_tunables.ksnd_backoff_init, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "backoff_max", ksocknal_tunables.ksnd_backoff_max, - sizeof(int), 0644, NULL, &proc_dointvec}; -#endif - - LASSERT (j == i+1); - LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0])); - - ksocknal_tunables.ksnd_sysctl = - register_sysctl_table(ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif /* # if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM */ - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ -#if (defined(CONFIG_SMP) && CPU_AFFINITY) - int bind; - int cpu; - char cmdline[64]; - ksock_irqinfo_t *info; - char *argv[] = {"/bin/sh", - "-c", - cmdline, - NULL}; - char *envp[] = {"HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - LASSERT (irq < NR_IRQS); - if (irq == 0) /* software NIC or affinity disabled */ - return; - - info = &ksocknal_data.ksnd_irqinfo[irq]; - - write_lock_bh (&ksocknal_data.ksnd_global_lock); - - LASSERT (info->ksni_valid); - bind = !info->ksni_bound; - info->ksni_bound = 1; - - write_unlock_bh (&ksocknal_data.ksnd_global_lock); - - if (!bind) /* bound already */ - return; - - cpu = ksocknal_irqsched2cpu(info->ksni_sched); - snprintf (cmdline, sizeof (cmdline), - "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); - - LCONSOLE_INFO("Binding irq %u to CPU %d with cmd: %s\n", - irq, cpu, cmdline); - - /* FIXME: Find a better method of setting IRQ affinity... - */ - - USERMODEHELPER(argv[0], argv, envp); -#endif -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, - &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -unsigned int -ksocknal_lib_sock_irq (struct socket *sock) -{ - int irq = 0; -#if CPU_AFFINITY - struct dst_entry *dst; - - if (!*ksocknal_tunables.ksnd_irq_affinity) - return 0; - - dst = sk_dst_get (sock->sk); - if (dst != NULL) { - if (dst->dev != NULL) { - irq = dst->dev->irq; - if (irq >= NR_IRQS) { - CERROR ("Unexpected IRQ %x\n", irq); - irq = 0; - } - } - dst_release (dst); - } - -#endif - return irq; -} - -int -ksocknal_lib_zc_capable(struct socket *sock) -{ - int caps = sock->sk->sk_route_caps; - - /* ZC if the socket supports scatter/gather and doesn't need software - * checksums */ - return ((caps & NETIF_F_SG) != 0 && - (caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) != 0); -} - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - int nob; - int rc; - - if (*ksocknal_tunables.ksnd_enable_csum && /* checksum enabled */ - conn->ksnc_proto == &ksocknal_protocol_v2x && /* V2.x connection */ - tx->tx_nob == tx->tx_resid && /* frist sending */ - tx->tx_msg.ksm_csum == 0) /* not checksummed */ - ksocknal_lib_csum_tx(tx); - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - - { -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_niov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - } - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag && - tx->tx_msg.ksm_zc_req_cookie != 0) { - /* Zero copy is enabled */ - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage(sock, page, offset, fragsize, msgflg); - } else { -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - unsigned int niov = tx->tx_nkiov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - } - return rc; -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - int opt = 1; - mm_segment_t oldmm = get_fs(); - struct socket *sock = conn->ksnc_sock; - - /* Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer. */ - - set_fs(KERNEL_DS); - sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, - (char *)&opt, sizeof (opt)); - set_fs(oldmm); -} - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_niov; -#endif - struct iovec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - int fragnob; - int sum; - __u32 saved_csum; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - LASSERT (niov > 0); - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean..........................^ */ - set_fs (oldmm); - - saved_csum = 0; - if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - saved_csum = conn->ksnc_msg.ksm_csum; - conn->ksnc_msg.ksm_csum = 0; - } - - if (saved_csum != 0) { - /* accumulate checksum */ - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT (i < niov); - - fragnob = iov[i].iov_len; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, - iov[i].iov_base, fragnob); - } - conn->ksnc_msg.ksm_csum = saved_csum; - } - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - unsigned int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - unsigned int niov = conn->ksnc_rx_nkiov; -#endif - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - void *base; - int sum; - int fragnob; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean.......................^ */ - set_fs (oldmm); - - if (conn->ksnc_msg.ksm_csum != 0) { - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT (i < niov); - - /* Dang! have to kmap again because I have nowhere to stash the - * mapped address. But by doing it while the page is still - * mapped, the kernel just bumps the map count and returns me - * the address it stashed. */ - base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - fragnob = kiov[i].kiov_len; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = ksocknal_csum(conn->ksnc_rx_csum, - base, fragnob); - - kunmap(kiov[i].kiov_page); - } - } - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - - return (rc); -} - -void ksocknal_lib_csum_tx(ksock_tx_t *tx) -{ - int i; - __u32 csum; - void *base; - - LASSERT(tx->tx_iov[0].iov_base == (void *)&tx->tx_msg); - LASSERT(tx->tx_conn != NULL); - LASSERT(tx->tx_conn->ksnc_proto == &ksocknal_protocol_v2x); - - tx->tx_msg.ksm_csum = 0; - - csum = ksocknal_csum(~0, (void *)tx->tx_iov[0].iov_base, - tx->tx_iov[0].iov_len); - - if (tx->tx_kiov != NULL) { - for (i = 0; i < tx->tx_nkiov; i++) { - base = kmap(tx->tx_kiov[i].kiov_page) + - tx->tx_kiov[i].kiov_offset; - - csum = ksocknal_csum(csum, base, tx->tx_kiov[i].kiov_len); - - kunmap(tx->tx_kiov[i].kiov_page); - } - } else { - for (i = 1; i < tx->tx_niov; i++) - csum = ksocknal_csum(csum, tx->tx_iov[i].iov_base, - tx->tx_iov[i].iov_len); - } - - if (*ksocknal_tunables.ksnd_inject_csum_error) { - csum++; - *ksocknal_tunables.ksnd_inject_csum_error = 0; - } - - tx->tx_msg.ksm_csum = csum; -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - mm_segment_t oldmm = get_fs (); - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - - rc = libcfs_sock_getbuf(sock, txmem, rxmem); - if (rc == 0) { - len = sizeof(*nagle); - set_fs(KERNEL_DS); - rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - set_fs(oldmm); - } - - ksocknal_connsock_decref(conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_setup_sock (struct socket *sock) -{ - mm_segment_t oldmm = get_fs (); - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - - linger.l_onoff = 0; - linger.l_linger = 0; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, - (char *)&linger, sizeof (linger)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } - - option = -1; - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER2: %d\n", rc); - return (rc); - } - - if (!*ksocknal_tunables.ksnd_nagle) { - option = 1; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - rc = libcfs_sock_setbuf(sock, - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size); - if (rc != 0) { - CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n", - *ksocknal_tunables.ksnd_tx_buffer_size, - *ksocknal_tunables.ksnd_rx_buffer_size, rc); - return (rc); - } - -/* TCP_BACKOFF_* sockopt tunables unsupported in stock kernels */ -#ifdef SOCKNAL_BACKOFF - if (*ksocknal_tunables.ksnd_backoff_init > 0) { - option = *ksocknal_tunables.ksnd_backoff_init; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_INIT, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set initial tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } - - if (*ksocknal_tunables.ksnd_backoff_max > 0) { - option = *ksocknal_tunables.ksnd_backoff_max; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_BACKOFF_MAX, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set maximum tcp backoff %d: %d\n", - option, rc); - return (rc); - } - } -#endif - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (0); - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof (keep_idle)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof (keep_intvl)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof (keep_count)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); - return (rc); - } - - return (0); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - return &(sk->tp_pinfo.af_tcp); -} -#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)) -#define sock2tcp_opt(sk) tcp_sk(sk) -#else -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - struct tcp_sock *s = (struct tcp_sock *)sk; - return &s->tcp; -} -#endif - -void -ksocknal_lib_push_conn (ksock_conn_t *conn) -{ - struct sock *sk; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)) - struct tcp_opt *tp; -#else - struct tcp_sock *tp; -#endif - int nonagle; - int val = 1; - int rc; - mm_segment_t oldmm; - - rc = ksocknal_connsock_addref(conn); - if (rc != 0) /* being shut down */ - return; - - sk = conn->ksnc_sock->sk; - tp = sock2tcp_opt(sk); - - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); - - oldmm = get_fs (); - set_fs (KERNEL_DS); - - rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof (val)); - LASSERT (rc == 0); - - set_fs (oldmm); - - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); - - ksocknal_connsock_decref(conn); -} - -extern void ksocknal_read_callback (ksock_conn_t *conn); -extern void ksocknal_write_callback (ksock_conn_t *conn); -/* - * socket call back in Linux - */ -static void -ksocknal_data_ready (struct sock *sk, int n) -{ - ksock_conn_t *conn; - ENTRY; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_data_ready != &ksocknal_data_ready); - sk->sk_data_ready (sk, n); - } else - ksocknal_read_callback(conn); - - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -static void -ksocknal_write_space (struct sock *sk) -{ - ksock_conn_t *conn; - int wspace; - int min_wpace; - - /* interleave correctly with closing sockets... */ - LASSERT(!in_irq()); - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - wspace = SOCKNAL_WSPACE(sk); - min_wpace = SOCKNAL_MIN_WSPACE(sk); - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, wspace, min_wpace, conn, - (conn == NULL) ? "" : (conn->ksnc_tx_ready ? - " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_write_space != &ksocknal_write_space); - sk->sk_write_space (sk); - - read_unlock (&ksocknal_data.ksnd_global_lock); - return; - } - - if (wspace >= min_wpace) { /* got enough space */ - ksocknal_write_callback(conn); - - /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the - * ENOMEM check in ksocknal_transmit is race-free (think about - * it). */ - - clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags); - } - - read_unlock (&ksocknal_data.ksnd_global_lock); -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ - conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; - conn->ksnc_saved_write_space = sock->sk->sk_write_space; -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->sk->sk_user_data = conn; - sock->sk->sk_data_ready = ksocknal_data_ready; - sock->sk->sk_write_space = ksocknal_write_space; - return; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - /* Remove conn's network callbacks. - * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! */ - sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; - sock->sk->sk_write_space = conn->ksnc_saved_write_space; - - /* A callback could be in progress already; they hold a read lock - * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. */ - sock->sk->sk_user_data = NULL; - - return ; -} - diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.h b/lnet/klnds/socklnd/socklnd_lib-linux.h deleted file mode 100644 index 8a5462fd109790c616957d6b8716db2b0d253596..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-linux.h +++ /dev/null @@ -1,122 +0,0 @@ -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef __LINUX_SOCKNAL_LIB_H__ -#define __LINUX_SOCKNAL_LIB_H__ - -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/version.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <net/tcp.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/irq.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <asm/div64.h> - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -# include <linux/syscalls.h> -#endif - -#include <libcfs/kp30.h> -#include <libcfs/linux/portals_compat25.h> - -#include <linux/crc32.h> -static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len) -{ -#if 1 - return crc32_le(crc, p, len); -#else - while (len-- > 0) - crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ; - return crc; -#endif -} - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,7)) -# define SOCKNAL_WSPACE(sk) sk_stream_wspace(sk) -# define SOCKNAL_MIN_WSPACE(sk) sk_stream_min_wspace(sk) -#else -# define SOCKNAL_WSPACE(sk) tcp_wspace(sk) -# define SOCKNAL_MIN_WSPACE(sk) (((sk)->sk_sndbuf*8)/10) -#endif - -#ifndef CONFIG_SMP -static inline -int ksocknal_nsched(void) -{ - return 1; -} -#else -#include <linux/lustre_version.h> -# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT)) -static inline int -ksocknal_nsched(void) -{ - return num_online_cpus(); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - return i; -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return i; -} -# else -static inline int -ksocknal_nsched(void) -{ - if (smp_num_siblings == 1) - return (num_online_cpus()); - - /* We need to know if this assumption is crap */ - LASSERT (smp_num_siblings == 2); - return (num_online_cpus()/2); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - if (smp_num_siblings == 1) - return i; - - return (i * 2); -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return (ksocknal_sched2cpu(i) + 1); -} -# endif -#endif - -#endif diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.c b/lnet/klnds/socklnd/socklnd_lib-winnt.c deleted file mode 100755 index 7669c77b3cef00355b56e14cd43f924c5372a2ad..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.c +++ /dev/null @@ -1,832 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2006 Cluster File Systems, Inc, All rights reserved. - * Author: Matt Wu - * - * This file is part of Lustre, http://www.lustre.org. - * - * This Lustre Software is proprietary - please refer to the license - * agreement you received with your software. - * - * windows socknal library - * - */ - -#include "socklnd.h" - -# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM -static ctl_table ksocknal_ctl_table[18]; - -ctl_table ksocknal_top_ctl_table[] = { - {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, - { 0 } -}; - -int -ksocknal_lib_tunables_init () -{ - int i = 0; - int j = 1; - - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "timeout", ksocknal_tunables.ksnd_timeout, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "credits", ksocknal_tunables.ksnd_credits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "peer_credits", ksocknal_tunables.ksnd_peercredits, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nconnds", ksocknal_tunables.ksnd_nconnds, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack, - sizeof (int), 0644, NULL, &proc_dointvec}; -#if SOCKNAL_ZC - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "zero_copy", ksocknal_tunables.ksnd_zc_min_frag, - sizeof (int), 0644, NULL, &proc_dointvec}; -#endif - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "typed", ksocknal_tunables.ksnd_typed_conns, - sizeof (int), 0444, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk, - sizeof (int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "buffer_size", ksocknal_tunables.ksnd_buffer_size, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "nagle", ksocknal_tunables.ksnd_nagle, - sizeof(int), 0644, NULL, &proc_dointvec}; -#if CPU_AFFINITY - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "irq_affinity", ksocknal_tunables.ksnd_irq_affinity, - sizeof(int), 0644, NULL, &proc_dointvec}; -#endif - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_idle", ksocknal_tunables.ksnd_keepalive_idle, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_count", ksocknal_tunables.ksnd_keepalive_count, - sizeof(int), 0644, NULL, &proc_dointvec}; - ksocknal_ctl_table[i++] = (ctl_table) - {j++, "keepalive_intvl", ksocknal_tunables.ksnd_keepalive_intvl, - sizeof(int), 0644, NULL, &proc_dointvec}; - - LASSERT (j == i+1); - LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0])); - - ksocknal_tunables.ksnd_sysctl = - register_sysctl_table(ksocknal_top_ctl_table, 0); - - if (ksocknal_tunables.ksnd_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ - if (ksocknal_tunables.ksnd_sysctl != NULL) - unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl); -} -#else -int -ksocknal_lib_tunables_init () -{ - return 0; -} - -void -ksocknal_lib_tunables_fini () -{ -} -#endif - -void -ksocknal_lib_bind_irq (unsigned int irq) -{ -} - -int -ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) -{ - int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1, - &conn->ksnc_ipaddr, &conn->ksnc_port); - - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - rc = libcfs_sock_getaddr(conn->ksnc_sock, 0, - &conn->ksnc_myipaddr, NULL); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - return 0; -} - -unsigned int -ksocknal_lib_sock_irq (struct socket *sock) -{ - return 0; -} - -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) -static struct page * -ksocknal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#if CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (NULL); - - return (page); -} -#endif - -/* - * ks_lock_iovs - * Lock the i/o vector buffers into MDL structure - * - * Arguments: - * iov: the array of i/o vectors - * niov: number of i/o vectors to be locked - * len: the real length of the iov vectors - * - * Return Value: - * ksock_mdl_t *: the Mdl of the locked buffers or - * NULL pointer in failure case - * - * Notes: - * N/A - */ - -ksock_mdl_t * -ks_lock_iovs( - IN struct iovec *iov, - IN int niov, - IN int recving, - IN int * len ) -{ - int rc = 0; - - int i = 0; - int total = 0; - ksock_mdl_t * mdl = NULL; - ksock_mdl_t * tail = NULL; - - LASSERT(iov != NULL); - LASSERT(niov > 0); - LASSERT(len != NULL); - - for (i=0; i < niov; i++) { - - ksock_mdl_t * Iovec = NULL; - - rc = ks_lock_buffer( - iov[i].iov_base, - FALSE, - iov[i].iov_len, - recving ? IoWriteAccess : IoReadAccess, - &Iovec ); - - if (rc < 0) { - break; - } - - if (tail) { - tail->Next = Iovec; - } else { - mdl = Iovec; - } - - tail = Iovec; - - total +=iov[i].iov_len; - } - - if (rc >= 0) { - *len = total; - } else { - if (mdl) { - ks_release_mdl(mdl, FALSE); - mdl = NULL; - } - } - - return mdl; -} - -/* - * ks_lock_kiovs - * Lock the kiov pages into MDL structure - * - * Arguments: - * kiov: the array of kiov pages - * niov: number of kiov to be locked - * len: the real length of the kiov arrary - * - * Return Value: - * PMDL: the Mdl of the locked buffers or NULL - * pointer in failure case - * - * Notes: - * N/A - */ -ksock_mdl_t * -ks_lock_kiovs( - IN lnet_kiov_t * kiov, - IN int nkiov, - IN int recving, - IN int * len ) -{ - int rc = 0; - int i = 0; - int total = 0; - ksock_mdl_t * mdl = NULL; - ksock_mdl_t * tail = NULL; - - LASSERT(kiov != NULL); - LASSERT(nkiov > 0); - LASSERT(len != NULL); - - for (i=0; i < nkiov; i++) { - - ksock_mdl_t * Iovec = NULL; - - - // - // Lock the kiov page into Iovec ¡ - // - - rc = ks_lock_buffer( - (PUCHAR)kiov[i].kiov_page->addr + - kiov[i].kiov_offset, - FALSE, - kiov[i].kiov_len, - recving ? IoWriteAccess : IoReadAccess, - &Iovec - ); - - if (rc < 0) { - break; - } - - // - // Attach the Iovec to the mdl chain - // - - if (tail) { - tail->Next = Iovec; - } else { - mdl = Iovec; - } - - tail = Iovec; - - total += kiov[i].kiov_len; - - } - - if (rc >= 0) { - *len = total; - } else { - if (mdl) { - ks_release_mdl(mdl, FALSE); - mdl = NULL; - } - } - - return mdl; -} - - -int -ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - unsigned long vaddr = (unsigned long)iov->iov_base - int offset = vaddr & (PAGE_SIZE - 1); - int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset); - struct page *page; -#endif - int nob; - int rc; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - if (zcsize >= ksocknal_data.ksnd_zc_min_frag && - (sock->sk->sk_route_caps & NETIF_F_SG) && - (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", - (void *)vaddr, page, page_address(page), offset, zcsize); - - if (!list_empty (&conn->ksnc_tx_queue) || - zcsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd); - } else -#endif - { - /* lock the whole tx iovs into a single mdl chain */ - mdl = ks_lock_iovs(tx->tx_iov, tx->tx_niov, FALSE, &nob); - - if (mdl) { - /* send the total mdl chain */ - rc = ks_send_mdl( conn->ksnc_sock, tx, mdl, nob, - (!list_empty (&conn->ksnc_tx_queue) || nob < tx->tx_resid) ? - (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT); - } else { - rc = -ENOMEM; - } - } - - return rc; -} - -int -ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; - lnet_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - -#if SOCKNAL_ZC - if (kiov->kiov_len >= *ksocknal_tunables.ksnd_zc_min_frag && - (sock->sk->sk_route_caps & NETIF_F_SG) && - (sock->sk->sk_route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg, - &tx->tx_zccd); - } else -#endif - { - /* lock the whole tx kiovs into a single mdl chain */ - mdl = ks_lock_kiovs(tx->tx_kiov, tx->tx_nkiov, FALSE, &nob); - - if (mdl) { - /* send the total mdl chain */ - rc = ks_send_mdl( - conn->ksnc_sock, tx, mdl, nob, - (!list_empty(&conn->ksnc_tx_queue) || nob < tx->tx_resid) ? - (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT); - } else { - rc = -ENOMEM; - } - } - - return rc; -} - - -int -ksocknal_lib_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int rc; - int size; - ksock_mdl_t * mdl; - - /* lock the whole tx iovs into a single mdl chain */ - mdl = ks_lock_iovs(iov, conn->ksnc_rx_niov, TRUE, &size); - - if (!mdl) { - return (-ENOMEM); - } - - LASSERT (size <= conn->ksnc_rx_nob_wanted); - - /* try to request data for the whole mdl chain */ - rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT); - - return rc; -} - -int -ksocknal_lib_recv_kiov (ksock_conn_t *conn) -{ - lnet_kiov_t *kiov = conn->ksnc_rx_kiov; - int size; - int rc; - ksock_mdl_t * mdl; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (conn->ksnc_rx_nkiov > 0); - - /* lock the whole tx kiovs into a single mdl chain */ - mdl = ks_lock_kiovs(kiov, conn->ksnc_rx_nkiov, TRUE, &size); - - if (!mdl) { - rc = -ENOMEM; - return (rc); - } - - LASSERT (size <= conn->ksnc_rx_nob_wanted); - - /* try to request data for the whole mdl chain */ - rc = ks_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT); - - return rc; -} - -void -ksocknal_lib_eager_ack (ksock_conn_t *conn) -{ - __u32 option = 1; - int rc = 0; - - rc = ks_set_tcp_option( - conn->ksnc_sock, TCP_SOCKET_NODELAY, - &option, sizeof(option) ); - if (rc != 0) { - CERROR("Can't disable nagle: %d\n", rc); - } -} - -int -ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) -{ - ksock_tconn_t * tconn = conn->ksnc_sock; - int len; - int rc; - - ks_get_tconn (tconn); - - *txmem = *rxmem = 0; - - len = sizeof(*nagle); - - rc = ks_get_tcp_option( - tconn, TCP_SOCKET_NODELAY, - (__u32 *)nagle, &len); - - ks_put_tconn (tconn); - - printk("ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_lib_buffersize (int current_sz, int tunable_sz) -{ - /* ensure >= SOCKNAL_MIN_BUFFER */ - if (current_sz < SOCKNAL_MIN_BUFFER) - return MAX(SOCKNAL_MIN_BUFFER, tunable_sz); - - if (tunable_sz > SOCKNAL_MIN_BUFFER) - return tunable_sz; - - /* leave alone */ - return 0; -} - -int -ksocknal_lib_setup_sock (struct socket *sock) -{ - int rc; - - int keep_idle; - int keep_count; - int keep_intvl; - int keep_alive; - - __u32 option; - - /* set the window size */ - -#if 0 - tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size; - tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size; -#endif - - /* disable nagle */ - if (!ksocknal_tunables.ksnd_nagle) { - option = 1; - - rc = ks_set_tcp_option( - sock, TCP_SOCKET_NODELAY, - &option, sizeof (option)); - if (rc != 0) { - printk ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - /* snapshot tunables */ - keep_idle = *ksocknal_tunables.ksnd_keepalive_idle; - keep_count = *ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl; - - keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (__u32)(keep_alive ? 1 : 0); - - rc = ks_set_tcp_option( - sock, TCP_SOCKET_KEEPALIVE, - &option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - - return (0); -} - -void -ksocknal_lib_push_conn (ksock_conn_t *conn) -{ - ksock_tconn_t * tconn; - __u32 nagle; - __u32 val = 1; - int rc; - - tconn = conn->ksnc_sock; - - ks_get_tconn(tconn); - - spin_lock(&tconn->kstc_lock); - if (tconn->kstc_type == kstt_sender) { - nagle = tconn->sender.kstc_info.nagle; - tconn->sender.kstc_info.nagle = 0; - } else { - LASSERT(tconn->kstc_type == kstt_child); - nagle = tconn->child.kstc_info.nagle; - tconn->child.kstc_info.nagle = 0; - } - - spin_unlock(&tconn->kstc_lock); - - val = 1; - rc = ks_set_tcp_option( - tconn, - TCP_SOCKET_NODELAY, - &(val), - sizeof(__u32) - ); - - LASSERT (rc == 0); - spin_lock(&tconn->kstc_lock); - - if (tconn->kstc_type == kstt_sender) { - tconn->sender.kstc_info.nagle = nagle; - } else { - LASSERT(tconn->kstc_type == kstt_child); - tconn->child.kstc_info.nagle = nagle; - } - spin_unlock(&tconn->kstc_lock); - - ks_put_tconn(tconn); -} - -/* @mode: 0: receiving mode / 1: sending mode */ -void -ksocknal_sched_conn (ksock_conn_t *conn, int mode, ksock_tx_t *tx) -{ - int flags; - ksock_sched_t * sched; - ENTRY; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - if (mode) { /* transmission can continue ... */ - - conn->ksnc_tx_ready = 1; - - if (tx) { - /* Incomplete send: place tx on HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if ( !conn->ksnc_tx_scheduled && - !list_empty(&conn->ksnc_tx_queue)) { //packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_conn_refcount); - - cfs_waitq_signal (&sched->kss_waitq); - } - } else { /* receiving can continue ... */ - - conn->ksnc_rx_ready = 1; - - if ( !conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_conn_refcount); - - cfs_waitq_signal (&sched->kss_waitq); - } - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - read_unlock (&ksocknal_data.ksnd_global_lock); - - EXIT; -} - -void ksocknal_schedule_callback(struct socket*sock, int mode, void * tx, ulong_ptr bytes) -{ - ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn; - - if (mode) { - ksocknal_sched_conn(conn, mode, tx); - } else { - if ( CAN_BE_SCHED(bytes, (ulong_ptr)conn->ksnc_rx_nob_wanted )) { - ksocknal_sched_conn(conn, mode, tx); - } - } -} - -extern void -ksocknal_tx_launched (ksock_tx_t *tx); - -void -ksocknal_fini_sending(ksock_tcpx_fini_t *tcpx) -{ - ksocknal_tx_launched(tcpx->tx); - cfs_free(tcpx); -} - -void * -ksocknal_update_tx( - struct socket* tconn, - void * txp, - ulong_ptr rc - ) -{ - ksock_tx_t * tx = (ksock_tx_t *)txp; - - /* - * the transmission was done, we need update the tx - */ - - LASSERT(tx->tx_resid >= (int)rc); - tx->tx_resid -= (int)rc; - - /* - * just partial of tx is sent out, we need update - * the fields of tx and schedule later transmission. - */ - - if (tx->tx_resid) { - - if (tx->tx_niov > 0) { - - /* if there's iov, we need process iov first */ - while (rc > 0 ) { - if (rc < tx->tx_iov->iov_len) { - /* didn't send whole iov entry... */ - tx->tx_iov->iov_base = - (char *)(tx->tx_iov->iov_base) + rc; - tx->tx_iov->iov_len -= rc; - rc = 0; - } else { - /* the whole of iov was sent out */ - rc -= tx->tx_iov->iov_len; - tx->tx_iov++; - tx->tx_niov--; - } - } - - } else { - - /* now we need process the kiov queues ... */ - - while (rc > 0 ) { - - if (rc < tx->tx_kiov->kiov_len) { - /* didn't send whole kiov entry... */ - tx->tx_kiov->kiov_offset += rc; - tx->tx_kiov->kiov_len -= rc; - rc = 0; - } else { - /* whole kiov was sent out */ - rc -= tx->tx_kiov->kiov_len; - tx->tx_kiov++; - tx->tx_nkiov--; - } - } - } - - } else { - - ksock_tcpx_fini_t * tcpx = - cfs_alloc(sizeof(ksock_tcpx_fini_t), CFS_ALLOC_ZERO); - - ASSERT(tx->tx_resid == 0); - - if (!tcpx) { - - ksocknal_tx_launched (tx); - - } else { - - tcpx->tx = tx; - ExInitializeWorkItem( - &(tcpx->item), - ksocknal_fini_sending, - tcpx - ); - ExQueueWorkItem( - &(tcpx->item), - CriticalWorkQueue - ); - } - - tx = NULL; - } - - return (void *)tx; -} - -void -ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) -{ -} - -void -ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->kstc_conn = conn; - sock->kstc_sched_cb = ksocknal_schedule_callback; - sock->kstc_update_tx = ksocknal_update_tx; -} - -void -ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) -{ - sock->kstc_conn = NULL; - sock->kstc_sched_cb = NULL; - sock->kstc_update_tx = NULL; -} - diff --git a/lnet/klnds/socklnd/socklnd_lib-winnt.h b/lnet/klnds/socklnd/socklnd_lib-winnt.h deleted file mode 100755 index 492c9f595ee558d90b13f7d98a699c329111a867..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_lib-winnt.h +++ /dev/null @@ -1,42 +0,0 @@ -#define DEBUG_PORTAL_ALLOC -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifndef __WINNT_TDILND_LIB_H__ -#define __WINNT_TDILND_LIB_H__ - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#ifndef CONFIG_SMP - -static inline -int ksocknal_nsched(void) -{ - return 1; -} - -#else - -static inline int -ksocknal_nsched(void) -{ - return num_online_cpus(); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - return i; -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return i; -} - -#endif - -#endif diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c deleted file mode 100644 index 917d4d7ee87513883c3ef5f3365368a534712960..0000000000000000000000000000000000000000 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ /dev/null @@ -1,156 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socklnd.h" - -static int sock_timeout = 50; -CFS_MODULE_PARM(sock_timeout, "i", int, 0644, - "dead socket timeout (seconds)"); - -static int credits = 256; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int nconnds = 4; -CFS_MODULE_PARM(nconnds, "i", int, 0444, - "# connection daemons"); - -static int min_reconnectms = 1000; -CFS_MODULE_PARM(min_reconnectms, "i", int, 0644, - "min connection retry interval (mS)"); - -static int max_reconnectms = 60000; -CFS_MODULE_PARM(max_reconnectms, "i", int, 0644, - "max connection retry interval (mS)"); - -#if defined(__APPLE__) && !defined(__DARWIN8__) -# define DEFAULT_EAGER_ACK 1 -#else -# define DEFAULT_EAGER_ACK 0 -#endif -static int eager_ack = DEFAULT_EAGER_ACK; -CFS_MODULE_PARM(eager_ack, "i", int, 0644, - "send tcp ack packets eagerly"); - -static int typed_conns = 1; -CFS_MODULE_PARM(typed_conns, "i", int, 0444, - "use different sockets for bulk"); - -static int min_bulk = (1<<10); -CFS_MODULE_PARM(min_bulk, "i", int, 0644, - "smallest 'large' message"); - -#ifdef __APPLE__ -# ifdef __DARWIN8__ -# define DEFAULT_BUFFER_SIZE (224*1024) -# else -# define DEFAULT_BUFFER_SIZE (1152 * 1024) -# endif -#else -# define DEFAULT_BUFFER_SIZE 0 -#endif -static int tx_buffer_size = DEFAULT_BUFFER_SIZE; -CFS_MODULE_PARM(tx_buffer_size, "i", int, 0644, - "socket tx buffer size (0 for system default)"); - -static int rx_buffer_size = DEFAULT_BUFFER_SIZE; -CFS_MODULE_PARM(rx_buffer_size, "i", int, 0644, - "socket rx buffer size (0 for system default)"); - -static int nagle = 0; -CFS_MODULE_PARM(nagle, "i", int, 0644, - "enable NAGLE?"); - -static int keepalive_idle = 30; -CFS_MODULE_PARM(keepalive_idle, "i", int, 0644, - "# idle seconds before probe"); - -#ifdef HAVE_BGL_SUPPORT -#define DEFAULT_KEEPALIVE_COUNT 100 -#else -#define DEFAULT_KEEPALIVE_COUNT 5 -#endif -static int keepalive_count = DEFAULT_KEEPALIVE_COUNT; -CFS_MODULE_PARM(keepalive_count, "i", int, 0644, - "# missed probes == dead"); - -static int keepalive_intvl = 5; -CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644, - "seconds between probes"); - -static int enable_csum = 0; -CFS_MODULE_PARM(enable_csum, "i", int, 0644, - "enable check sum"); - -static int inject_csum_error = 0; -CFS_MODULE_PARM(inject_csum_error, "i", int, 0644, - "set non-zero to inject a checksum error"); -#ifdef CPU_AFFINITY -static int enable_irq_affinity = 1; -CFS_MODULE_PARM(enable_irq_affinity, "i", int, 0644, - "enable IRQ affinity"); -#endif - -static unsigned int zc_min_frag = (2<<10); -CFS_MODULE_PARM(zc_min_frag, "i", int, 0644, - "minimum fragment to zero copy"); - -#ifdef SOCKNAL_BACKOFF -static int backoff_init = 3; -CFS_MODULE_PARM(backoff_init, "i", int, 0644, - "seconds for initial tcp backoff"); - -static int backoff_max = 3; -CFS_MODULE_PARM(backoff_max, "i", int, 0644, - "seconds for maximum tcp backoff"); -#endif - -ksock_tunables_t ksocknal_tunables = { - .ksnd_timeout = &sock_timeout, - .ksnd_credits = &credits, - .ksnd_peercredits = &peer_credits, - .ksnd_nconnds = &nconnds, - .ksnd_min_reconnectms = &min_reconnectms, - .ksnd_max_reconnectms = &max_reconnectms, - .ksnd_eager_ack = &eager_ack, - .ksnd_typed_conns = &typed_conns, - .ksnd_min_bulk = &min_bulk, - .ksnd_tx_buffer_size = &tx_buffer_size, - .ksnd_rx_buffer_size = &rx_buffer_size, - .ksnd_nagle = &nagle, - .ksnd_keepalive_idle = &keepalive_idle, - .ksnd_keepalive_count = &keepalive_count, - .ksnd_keepalive_intvl = &keepalive_intvl, - .ksnd_enable_csum = &enable_csum, - .ksnd_inject_csum_error = &inject_csum_error, - .ksnd_zc_min_frag = &zc_min_frag, -#ifdef CPU_AFFINITY - .ksnd_irq_affinity = &enable_irq_affinity, -#endif -#ifdef SOCKNAL_BACKOFF - .ksnd_backoff_init = &backoff_init, - .ksnd_backoff_max = &backoff_max, -#endif -}; - diff --git a/lnet/klnds/viblnd/.cvsignore b/lnet/klnds/viblnd/.cvsignore deleted file mode 100644 index 2e9b6f47052e4a9724b08b6336229b01d72676a4..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend -wirecheck diff --git a/lnet/klnds/viblnd/Makefile.in b/lnet/klnds/viblnd/Makefile.in deleted file mode 100644 index 5b5c2db4ad030cd840b6cb78f301a9ce4a87d396..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/Makefile.in +++ /dev/null @@ -1,6 +0,0 @@ -MODULES := kviblnd -kviblnd-objs := viblnd.o viblnd_cb.o viblnd_modparams.o - -EXTRA_POST_CFLAGS := @VIBCPPFLAGS@ - -@INCLUDE_RULES@ diff --git a/lnet/klnds/viblnd/autoMakefile.am b/lnet/klnds/viblnd/autoMakefile.am deleted file mode 100644 index 19861a9fe6186728ccf1a6c821beb97ad32c08e1..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/autoMakefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if BUILD_VIBLND -modulenet_DATA = kviblnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -DIST_SOURCES = $(kviblnd-objs:%.o=%.c) viblnd.h viblnd_wire.h diff --git a/lnet/klnds/viblnd/viblnd.c b/lnet/klnds/viblnd/viblnd.c deleted file mode 100644 index 0d738a10e5fcc9d365b8d32b8f54de2f70285a21..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd.c +++ /dev/null @@ -1,2014 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -lnd_t the_kiblnd = { - .lnd_type = VIBLND, - .lnd_startup = kibnal_startup, - .lnd_shutdown = kibnal_shutdown, - .lnd_ctl = kibnal_ctl, - .lnd_send = kibnal_send, - .lnd_recv = kibnal_recv, - .lnd_eager_recv = kibnal_eager_recv, -}; - -kib_data_t kibnal_data; - -void vibnal_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux robert 2.6.11-1.27_FC3 #1 Tue May 17 20:27:37 EDT 2005 i686 athlon i386 G - * with gcc version 3.4.3 20050227 (Red Hat 3.4.3-22.fc3) */ - - - /* Constants... */ - CLASSERT (IBNAL_MSG_MAGIC == 0x0be91b91); - CLASSERT (IBNAL_MSG_VERSION == 0x11); - CLASSERT (IBNAL_MSG_CONNREQ == 0xc0); - CLASSERT (IBNAL_MSG_CONNACK == 0xc1); - CLASSERT (IBNAL_MSG_NOOP == 0xd0); - CLASSERT (IBNAL_MSG_IMMEDIATE == 0xd1); - CLASSERT (IBNAL_MSG_PUT_REQ == 0xd2); - CLASSERT (IBNAL_MSG_PUT_NAK == 0xd3); - CLASSERT (IBNAL_MSG_PUT_ACK == 0xd4); - CLASSERT (IBNAL_MSG_PUT_DONE == 0xd5); - CLASSERT (IBNAL_MSG_GET_REQ == 0xd6); - CLASSERT (IBNAL_MSG_GET_DONE == 0xd7); - - /* Checks for struct kib_connparams_t */ - CLASSERT ((int)sizeof(kib_connparams_t) == 12); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_queue_depth) == 0); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_queue_depth) == 4); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_msg_size) == 4); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_msg_size) == 4); - CLASSERT ((int)offsetof(kib_connparams_t, ibcp_max_frags) == 8); - CLASSERT ((int)sizeof(((kib_connparams_t *)0)->ibcp_max_frags) == 4); - - /* Checks for struct kib_immediate_msg_t */ - CLASSERT ((int)sizeof(kib_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_hdr) == 0); - CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_hdr) == 72); - CLASSERT ((int)offsetof(kib_immediate_msg_t, ibim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kib_immediate_msg_t *)0)->ibim_payload[13]) == 1); - CLASSERT (IBNAL_USE_FMR == 1); - - /* Checks for struct kib_rdma_desc_t */ - CLASSERT ((int)sizeof(kib_rdma_desc_t) == 16); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_addr) == 0); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_addr) == 8); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_nob) == 8); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_nob) == 4); - CLASSERT ((int)offsetof(kib_rdma_desc_t, rd_key) == 12); - CLASSERT ((int)sizeof(((kib_rdma_desc_t *)0)->rd_key) == 4); - - /* Checks for struct kib_putreq_msg_t */ - CLASSERT ((int)sizeof(kib_putreq_msg_t) == 80); - CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_hdr) == 0); - CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_hdr) == 72); - CLASSERT ((int)offsetof(kib_putreq_msg_t, ibprm_cookie) == 72); - CLASSERT ((int)sizeof(((kib_putreq_msg_t *)0)->ibprm_cookie) == 8); - - /* Checks for struct kib_putack_msg_t */ - CLASSERT ((int)sizeof(kib_putack_msg_t) == 32); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_src_cookie) == 0); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_src_cookie) == 8); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_dst_cookie) == 8); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_dst_cookie) == 8); - CLASSERT ((int)offsetof(kib_putack_msg_t, ibpam_rd) == 16); - CLASSERT ((int)sizeof(((kib_putack_msg_t *)0)->ibpam_rd) == 16); - - /* Checks for struct kib_get_msg_t */ - CLASSERT ((int)sizeof(kib_get_msg_t) == 96); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_hdr) == 0); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_hdr) == 72); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_cookie) == 72); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_cookie) == 8); - CLASSERT ((int)offsetof(kib_get_msg_t, ibgm_rd) == 80); - CLASSERT ((int)sizeof(((kib_get_msg_t *)0)->ibgm_rd) == 16); - - /* Checks for struct kib_completion_msg_t */ - CLASSERT ((int)sizeof(kib_completion_msg_t) == 12); - CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_cookie) == 0); - CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_cookie) == 8); - CLASSERT ((int)offsetof(kib_completion_msg_t, ibcm_status) == 8); - CLASSERT ((int)sizeof(((kib_completion_msg_t *)0)->ibcm_status) == 4); - - /* Checks for struct kib_msg_t */ - CLASSERT ((int)sizeof(kib_msg_t) == 152); - CLASSERT ((int)offsetof(kib_msg_t, ibm_magic) == 0); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_magic) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_version) == 4); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_version) == 2); - CLASSERT ((int)offsetof(kib_msg_t, ibm_type) == 6); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_type) == 1); - CLASSERT ((int)offsetof(kib_msg_t, ibm_credits) == 7); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_credits) == 1); - CLASSERT ((int)offsetof(kib_msg_t, ibm_nob) == 8); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_nob) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_cksum) == 12); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_cksum) == 4); - CLASSERT ((int)offsetof(kib_msg_t, ibm_srcnid) == 16); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcnid) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_srcstamp) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_dstnid) == 32); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dstnid) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_dststamp) == 40); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_dststamp) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_seq) == 48); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_seq) == 8); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.connparams) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.connparams) == 12); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.immediate) == 72); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putreq) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putreq) == 80); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.putack) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.putack) == 32); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.get) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.get) == 96); - CLASSERT ((int)offsetof(kib_msg_t, ibm_u.completion) == 56); - CLASSERT ((int)sizeof(((kib_msg_t *)0)->ibm_u.completion) == 12); -} - -__u32 -kibnal_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kibnal_init_msg(kib_msg_t *msg, int type, int body_nob) -{ - msg->ibm_type = type; - msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob; -} - -void -kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq) -{ - /* CAVEAT EMPTOR! all message fields not set here should have been - * initialised previously. */ - msg->ibm_magic = IBNAL_MSG_MAGIC; - msg->ibm_version = version; - /* ibm_type */ - msg->ibm_credits = credits; - /* ibm_nob */ - msg->ibm_cksum = 0; - msg->ibm_srcnid = lnet_ptlcompat_srcnid(kibnal_data.kib_ni->ni_nid, - dstnid); - msg->ibm_srcstamp = kibnal_data.kib_incarnation; - msg->ibm_dstnid = dstnid; - msg->ibm_dststamp = dststamp; - msg->ibm_seq = seq; - - if (*kibnal_tunables.kib_cksum) { - /* NB ibm_cksum zero while computing cksum */ - msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob); - } -} - -int -kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob) -{ - const int hdr_size = offsetof(kib_msg_t, ibm_u); - __u32 msg_cksum; - __u32 msg_version; - int flip; - int msg_nob; -#if !IBNAL_USE_FMR - int i; - int n; -#endif - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - /* Future protocol version compatibility support! - * If the viblnd-specific protocol changes, or when LNET unifies - * protocols over all LNDs, the initial connection will negotiate a - * protocol version. If I find this, I avoid any console errors. If - * my is doing connection establishment, the reject will tell the peer - * which version I'm running. */ - - if (msg->ibm_magic == IBNAL_MSG_MAGIC) { - flip = 0; - } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) { - flip = 1; - } else { - if (msg->ibm_magic == LNET_PROTO_MAGIC || - msg->ibm_magic == __swab32(LNET_PROTO_MAGIC)) - return -EPROTO; - - /* Completely out to lunch */ - CERROR("Bad magic: %08x\n", msg->ibm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version; - if (expected_version == 0) { - if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - msg_version != IBNAL_MSG_VERSION) - return -EPROTO; - } else if (msg_version != expected_version) { - CERROR("Bad version: %x(%x expected)\n", - msg_version, expected_version); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: %d\n", nob); - return -EPROTO; - } - - msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob; - if (msg_nob > nob) { - CERROR("Short message: got %d, wanted %d\n", nob, msg_nob); - return -EPROTO; - } - - /* checksum must be computed with ibm_cksum zero and BEFORE anything - * gets flipped */ - msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum; - msg->ibm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kibnal_cksum(msg, msg_nob)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - msg->ibm_cksum = msg_cksum; - - if (flip) { - /* leave magic unflipped as a clue to peer endianness */ - msg->ibm_version = msg_version; - CLASSERT (sizeof(msg->ibm_type) == 1); - CLASSERT (sizeof(msg->ibm_credits) == 1); - msg->ibm_nob = msg_nob; - __swab64s(&msg->ibm_srcnid); - __swab64s(&msg->ibm_srcstamp); - __swab64s(&msg->ibm_dstnid); - __swab64s(&msg->ibm_dststamp); - __swab64s(&msg->ibm_seq); - } - - if (msg->ibm_srcnid == LNET_NID_ANY) { - CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid)); - return -EPROTO; - } - - switch (msg->ibm_type) { - default: - CERROR("Unknown message type %x\n", msg->ibm_type); - return -EPROTO; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) { - CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) { - CERROR("Short PUT_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putreq))); - return -EPROTO; - } - break; - - case IBNAL_MSG_PUT_ACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.putack))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag); - } - - n = msg->ibm_u.putack.ibpam_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) { - CERROR("Short PUT_ACK: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) { - for (i = 0; i < n; i++) { - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_lo); - __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr_hi); - } - } -#endif - break; - - case IBNAL_MSG_GET_REQ: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.get))); - return -EPROTO; - } -#if IBNAL_USE_FMR - if (flip) { - __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - } -#else - if (flip) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag); - } - - n = msg->ibm_u.get.ibgm_rd.rd_nfrag; - if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n", - n, IBNAL_MAX_RDMA_FRAGS); - return -EPROTO; - } - - if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) { - CERROR("Short GET_REQ: %d(%d)\n", msg_nob, - (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])); - return -EPROTO; - } - - if (flip) - for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) { - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_lo); - __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr_hi); - } -#endif - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) { - CERROR("Short RDMA completion: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.completion))); - return -EPROTO; - } - if (flip) - __swab32s(&msg->ibm_u.completion.ibcm_status); - break; - - case IBNAL_MSG_CONNREQ: - case IBNAL_MSG_CONNACK: - if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) { - CERROR("Short connreq/ack: %d(%d)\n", msg_nob, - (int)(hdr_size + sizeof(msg->ibm_u.connparams))); - return -EPROTO; - } - if (flip) { - __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth); - __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size); - __swab32s(&msg->ibm_u.connparams.ibcp_max_frags); - } - break; - } - return 0; -} - -int -kibnal_start_listener (lnet_ni_t *ni) -{ - static cm_listen_data_t info; - - cm_return_t cmrc; - - LASSERT (kibnal_data.kib_listen_handle == NULL); - - kibnal_data.kib_listen_handle = - cm_create_cep(cm_cep_transp_rc); - if (kibnal_data.kib_listen_handle == NULL) { - CERROR ("Can't create listen CEP\n"); - return -ENOMEM; - } - - CDEBUG(D_NET, "Created CEP %p for listening\n", - kibnal_data.kib_listen_handle); - - memset(&info, 0, sizeof(info)); - info.listen_addr.end_pt.sid = - (__u64)(*kibnal_tunables.kib_service_number); - - cmrc = cm_listen(kibnal_data.kib_listen_handle, &info, - kibnal_listen_callback, NULL); - if (cmrc == cm_stat_success) - return 0; - - CERROR ("cm_listen error: %d\n", cmrc); - - cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); - LASSERT (cmrc == cm_stat_success); - - kibnal_data.kib_listen_handle = NULL; - return -EINVAL; -} - -void -kibnal_stop_listener(lnet_ni_t *ni) -{ - cm_return_t cmrc; - - LASSERT (kibnal_data.kib_listen_handle != NULL); - - cmrc = cm_cancel(kibnal_data.kib_listen_handle); - if (cmrc != cm_stat_success) - CERROR ("Error %d stopping listener\n", cmrc); - - cfs_pause(cfs_time_seconds(1)/10); /* ensure no more callbacks */ - - cmrc = cm_destroy_cep(kibnal_data.kib_listen_handle); - if (cmrc != vv_return_ok) - CERROR ("Error %d destroying CEP\n", cmrc); - - kibnal_data.kib_listen_handle = NULL; -} - -int -kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid) -{ - kib_peer_t *peer; - unsigned long flags; - int rc; - - LASSERT (nid != LNET_NID_ANY); - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Cannot allocate peer\n"); - return -ENOMEM; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - peer->ibp_nid = nid; - atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */ - - INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */ - INIT_LIST_HEAD (&peer->ibp_conns); - INIT_LIST_HEAD (&peer->ibp_tx_queue); - - peer->ibp_error = 0; - peer->ibp_last_alive = cfs_time_current(); - peer->ibp_reconnect_interval = 0; /* OK to connect at any time */ - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (atomic_read(&kibnal_data.kib_npeers) >= - *kibnal_tunables.kib_concurrent_peers) { - rc = -EOVERFLOW; /* !! but at least it distinguishes */ - } else if (kibnal_data.kib_listen_handle == NULL) { - rc = -ESHUTDOWN; /* shutdown has started */ - } else { - rc = 0; - /* npeers only grows with the global lock held */ - atomic_inc(&kibnal_data.kib_npeers); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (rc != 0) { - CERROR("Can't create peer: %s\n", - (rc == -ESHUTDOWN) ? "shutting down" : - "too many peers"); - LIBCFS_FREE(peer, sizeof(*peer)); - } else { - *peerp = peer; - } - - return rc; -} - -void -kibnal_destroy_peer (kib_peer_t *peer) -{ - LASSERT (atomic_read (&peer->ibp_refcount) == 0); - LASSERT (peer->ibp_persistence == 0); - LASSERT (!kibnal_peer_active(peer)); - LASSERT (peer->ibp_connecting == 0); - LASSERT (peer->ibp_accepting == 0); - LASSERT (list_empty (&peer->ibp_conns)); - LASSERT (list_empty (&peer->ibp_tx_queue)); - - LIBCFS_FREE (peer, sizeof (*peer)); - - /* NB a peer's connections keep a reference on their peer until - * they are destroyed, so we can be assured that _all_ state to do - * with this peer has been cleaned up when its refcount drops to - * zero. */ - atomic_dec(&kibnal_data.kib_npeers); -} - -kib_peer_t * -kibnal_find_peer_locked (lnet_nid_t nid) -{ - /* the caller is responsible for accounting the additional reference - * that this creates */ - struct list_head *peer_list = kibnal_nid2peerlist (nid); - struct list_head *tmp; - kib_peer_t *peer; - - list_for_each (tmp, peer_list) { - - peer = list_entry (tmp, kib_peer_t, ibp_list); - - LASSERT (peer->ibp_persistence != 0 || /* persistent peer */ - peer->ibp_connecting != 0 || /* creating conns */ - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); /* active conn */ - - if (peer->ibp_nid != nid) - continue; - - CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n", - peer, libcfs_nid2str(nid), - atomic_read (&peer->ibp_refcount)); - return (peer); - } - return (NULL); -} - -void -kibnal_unlink_peer_locked (kib_peer_t *peer) -{ - LASSERT (peer->ibp_persistence == 0); - LASSERT (list_empty(&peer->ibp_conns)); - - LASSERT (kibnal_peer_active(peer)); - list_del_init (&peer->ibp_list); - /* lose peerlist's ref */ - kibnal_peer_decref(peer); -} - -int -kibnal_get_peer_info (int index, lnet_nid_t *nidp, __u32 *ipp, - int *persistencep) -{ - kib_peer_t *peer; - struct list_head *ptmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (index-- > 0) - continue; - - *nidp = peer->ibp_nid; - *ipp = peer->ibp_ip; - *persistencep = peer->ibp_persistence; - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (0); - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (-ENOENT); -} - -int -kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip) -{ - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - int rc; - - CDEBUG(D_NET, "%s at %u.%u.%u.%u\n", - libcfs_nid2str(nid), HIPQUAD(ip)); - - if (nid == LNET_NID_ANY) - return (-EINVAL); - - rc = kibnal_create_peer(&peer, nid); - if (rc != 0) - return rc; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - peer2 = kibnal_find_peer_locked (nid); - if (peer2 != NULL) { - kibnal_peer_decref (peer); - peer = peer2; - } else { - /* peer table takes existing ref on peer */ - list_add_tail (&peer->ibp_list, - kibnal_nid2peerlist (nid)); - } - - peer->ibp_ip = ip; - peer->ibp_persistence++; - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (0); -} - -void -kibnal_del_peer_locked (kib_peer_t *peer) -{ - struct list_head *ctmp; - struct list_head *cnxt; - kib_conn_t *conn; - - peer->ibp_persistence = 0; - - if (list_empty(&peer->ibp_conns)) { - kibnal_unlink_peer_locked(peer); - } else { - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry(ctmp, kib_conn_t, ibc_list); - - kibnal_close_conn_locked (conn, 0); - } - /* NB peer is no longer persistent; closing its last conn - * unlinked it. */ - } - /* NB peer now unlinked; might even be freed if the peer table had the - * last ref on it. */ -} - -int -kibnal_del_peer (lnet_nid_t nid) -{ - CFS_LIST_HEAD (zombies); - struct list_head *ptmp; - struct list_head *pnxt; - kib_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid)) - continue; - - if (!list_empty(&peer->ibp_tx_queue)) { - LASSERT (list_empty(&peer->ibp_conns)); - - list_splice_init(&peer->ibp_tx_queue, &zombies); - } - - kibnal_del_peer_locked (peer); - rc = 0; /* matched something */ - } - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_txlist_done(&zombies, -EIO); - - return (rc); -} - -kib_conn_t * -kibnal_get_conn_by_idx (int index) -{ - kib_peer_t *peer; - struct list_head *ptmp; - kib_conn_t *conn; - struct list_head *ctmp; - int i; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - list_for_each (ptmp, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence > 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - list_for_each (ctmp, &peer->ibp_conns) { - if (index-- > 0) - continue; - - conn = list_entry (ctmp, kib_conn_t, ibc_list); - kibnal_conn_addref(conn); - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return (conn); - } - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return (NULL); -} - -void -kibnal_debug_rx (kib_rx_t *rx) -{ - CDEBUG(D_CONSOLE, " %p nob %d msg_type %x " - "cred %d seq "LPD64"\n", - rx, rx->rx_nob, rx->rx_msg->ibm_type, - rx->rx_msg->ibm_credits, rx->rx_msg->ibm_seq); -} - -void -kibnal_debug_tx (kib_tx_t *tx) -{ - CDEBUG(D_CONSOLE, " %p snd %d q %d w %d rc %d dl %lx " - "cookie "LPX64" msg %s%s type %x cred %d seq "LPD64"\n", - tx, tx->tx_sending, tx->tx_queued, tx->tx_waiting, - tx->tx_status, tx->tx_deadline, tx->tx_cookie, - tx->tx_lntmsg[0] == NULL ? "-" : "!", - tx->tx_lntmsg[1] == NULL ? "-" : "!", - tx->tx_msg->ibm_type, tx->tx_msg->ibm_credits, - tx->tx_msg->ibm_seq); -} - -void -kibnal_debug_conn (kib_conn_t *conn) -{ - struct list_head *tmp; - int i; - - spin_lock(&conn->ibc_lock); - - CDEBUG(D_CONSOLE, "conn[%d] %p -> %s: \n", - atomic_read(&conn->ibc_refcount), conn, - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - CDEBUG(D_CONSOLE, " txseq "LPD64" rxseq "LPD64" state %d \n", - conn->ibc_txseq, conn->ibc_rxseq, conn->ibc_state); - CDEBUG(D_CONSOLE, " nposted %d cred %d o_cred %d r_cred %d\n", - conn->ibc_nsends_posted, conn->ibc_credits, - conn->ibc_outstanding_credits, conn->ibc_reserved_credits); - CDEBUG(D_CONSOLE, " disc %d comms_err %d\n", - conn->ibc_disconnect, conn->ibc_comms_error); - - CDEBUG(D_CONSOLE, " early_rxs:\n"); - list_for_each(tmp, &conn->ibc_early_rxs) - kibnal_debug_rx(list_entry(tmp, kib_rx_t, rx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_nocred:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_nocred) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue_rsrvd:\n"); - list_for_each(tmp, &conn->ibc_tx_queue_rsrvd) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " tx_queue:\n"); - list_for_each(tmp, &conn->ibc_tx_queue) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " active_txs:\n"); - list_for_each(tmp, &conn->ibc_active_txs) - kibnal_debug_tx(list_entry(tmp, kib_tx_t, tx_list)); - - CDEBUG(D_CONSOLE, " rxs:\n"); - for (i = 0; i < IBNAL_RX_MSGS; i++) - kibnal_debug_rx(&conn->ibc_rxs[i]); - - spin_unlock(&conn->ibc_lock); -} - -int -kibnal_set_qp_state (kib_conn_t *conn, vv_qp_state_t new_state) -{ - static vv_qp_attr_t attr; - - kib_connvars_t *cv = conn->ibc_connvars; - vv_return_t vvrc; - - /* Only called by connd => static OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - memset(&attr, 0, sizeof(attr)); - - switch (new_state) { - default: - LBUG(); - - case vv_qp_state_init: { - struct vv_qp_modify_init_st *init = &attr.modify.params.init; - - init->p_key_indx = cv->cv_pkey_index; - init->phy_port_num = cv->cv_port; - init->q_key = IBNAL_QKEY; /* XXX but VV_QP_AT_Q_KEY not set! */ - init->access_control = vv_acc_r_mem_read | - vv_acc_r_mem_write; /* XXX vv_acc_l_mem_write ? */ - - attr.modify.vv_qp_attr_mask = VV_QP_AT_P_KEY_IX | - VV_QP_AT_PHY_PORT_NUM | - VV_QP_AT_ACCESS_CON_F; - break; - } - case vv_qp_state_rtr: { - struct vv_qp_modify_rtr_st *rtr = &attr.modify.params.rtr; - vv_add_vec_t *av = &rtr->remote_add_vec; - - av->dlid = cv->cv_path.dlid; - av->grh_flag = (!IBNAL_LOCAL_SUB); - av->max_static_rate = IBNAL_R_2_STATIC_RATE(cv->cv_path.rate); - av->service_level = cv->cv_path.sl; - av->source_path_bit = IBNAL_SOURCE_PATH_BIT; - av->pmtu = cv->cv_path.mtu; - av->rnr_retry_count = cv->cv_rnr_count; - av->global_dest.traffic_class = cv->cv_path.traffic_class; - av->global_dest.hope_limit = cv->cv_path.hop_limut; - av->global_dest.flow_lable = cv->cv_path.flow_label; - av->global_dest.s_gid_index = cv->cv_sgid_index; - // XXX other av fields zero? - - rtr->destanation_qp = cv->cv_remote_qpn; - rtr->receive_psn = cv->cv_rxpsn; - rtr->responder_rdma_r_atom_num = IBNAL_OUS_DST_RD; - rtr->opt_min_rnr_nak_timer = *kibnal_tunables.kib_rnr_nak_timer; - - - // XXX sdp sets VV_QP_AT_OP_F but no actual optional options - attr.modify.vv_qp_attr_mask = VV_QP_AT_ADD_VEC | - VV_QP_AT_DEST_QP | - VV_QP_AT_R_PSN | - VV_QP_AT_MIN_RNR_NAK_T | - VV_QP_AT_RESP_RDMA_ATOM_OUT_NUM | - VV_QP_AT_OP_F; - break; - } - case vv_qp_state_rts: { - struct vv_qp_modify_rts_st *rts = &attr.modify.params.rts; - - rts->send_psn = cv->cv_txpsn; - rts->local_ack_timeout = *kibnal_tunables.kib_local_ack_timeout; - rts->retry_num = *kibnal_tunables.kib_retry_cnt; - rts->rnr_num = *kibnal_tunables.kib_rnr_cnt; - rts->dest_out_rdma_r_atom_num = IBNAL_OUS_DST_RD; - - attr.modify.vv_qp_attr_mask = VV_QP_AT_S_PSN | - VV_QP_AT_L_ACK_T | - VV_QP_AT_RETRY_NUM | - VV_QP_AT_RNR_NUM | - VV_QP_AT_DEST_RDMA_ATOM_OUT_NUM; - break; - } - case vv_qp_state_error: - case vv_qp_state_reset: - attr.modify.vv_qp_attr_mask = 0; - break; - } - - attr.modify.qp_modify_into_state = new_state; - attr.modify.vv_qp_attr_mask |= VV_QP_AT_STATE; - - vvrc = vv_qp_modify(kibnal_data.kib_hca, conn->ibc_qp, &attr, NULL); - if (vvrc != vv_return_ok) { - CERROR("Can't modify qp -> %s state to %d: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - new_state, vvrc); - return -EIO; - } - - return 0; -} - -kib_conn_t * -kibnal_create_conn (cm_cep_handle_t cep) -{ - kib_conn_t *conn; - int i; - int page_offset; - int ipage; - vv_return_t vvrc; - int rc; - - static vv_qp_attr_t reqattr; - static vv_qp_attr_t rspattr; - - /* Only the connd creates conns => single threaded */ - LASSERT(!in_interrupt()); - LASSERT(current == kibnal_data.kib_connd); - - LIBCFS_ALLOC(conn, sizeof (*conn)); - if (conn == NULL) { - CERROR ("Can't allocate connection\n"); - return (NULL); - } - - /* zero flags, NULL pointers etc... */ - memset (conn, 0, sizeof (*conn)); - - conn->ibc_version = IBNAL_MSG_VERSION; /* Use latest version at first */ - - INIT_LIST_HEAD (&conn->ibc_early_rxs); - INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred); - INIT_LIST_HEAD (&conn->ibc_tx_queue); - INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd); - INIT_LIST_HEAD (&conn->ibc_active_txs); - spin_lock_init (&conn->ibc_lock); - - atomic_inc (&kibnal_data.kib_nconns); - /* well not really, but I call destroy() on failure, which decrements */ - - conn->ibc_cep = cep; - - LIBCFS_ALLOC(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - if (conn->ibc_connvars == NULL) { - CERROR("Can't allocate in-progress connection state\n"); - goto failed; - } - memset (conn->ibc_connvars, 0, sizeof(*conn->ibc_connvars)); - /* Random seed for QP sequence number */ - get_random_bytes(&conn->ibc_connvars->cv_rxpsn, - sizeof(conn->ibc_connvars->cv_rxpsn)); - - LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t)); - if (conn->ibc_rxs == NULL) { - CERROR("Cannot allocate RX buffers\n"); - goto failed; - } - memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1); - if (rc != 0) - goto failed; - - for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) { - struct page *page = conn->ibc_rx_pages->ibp_pages[ipage]; - kib_rx_t *rx = &conn->ibc_rxs[i]; - vv_mem_reg_h_t mem_h; - vv_r_key_t r_key; - - rx->rx_conn = conn; - rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - rx->rx_msg, - IBNAL_MSG_SIZE, - &mem_h, - &rx->rx_lkey, - &r_key); - LASSERT (vvrc == vv_return_ok); - - CDEBUG(D_NET, "Rx[%d] %p->%p[%x]\n", i, rx, - rx->rx_msg, rx->rx_lkey); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_RX_MSG_PAGES); - } - } - - memset(&reqattr, 0, sizeof(reqattr)); - - reqattr.create.qp_type = vv_qp_type_r_conn; - reqattr.create.cq_send_h = kibnal_data.kib_cq; - reqattr.create.cq_receive_h = kibnal_data.kib_cq; - reqattr.create.send_max_outstand_wr = (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends); - reqattr.create.receive_max_outstand_wr = IBNAL_RX_MSGS; - reqattr.create.max_scatgat_per_send_wr = 1; - reqattr.create.max_scatgat_per_receive_wr = 1; - reqattr.create.signaling_type = vv_selectable_signaling; - reqattr.create.pd_h = kibnal_data.kib_pd; - reqattr.create.recv_solicited_events = vv_selectable_signaling; // vv_signal_all; - - vvrc = vv_qp_create(kibnal_data.kib_hca, &reqattr, NULL, - &conn->ibc_qp, &rspattr); - if (vvrc != vv_return_ok) { - CERROR ("Failed to create queue pair: %d\n", vvrc); - goto failed; - } - - /* Mark QP created */ - conn->ibc_state = IBNAL_CONN_INIT_QP; - conn->ibc_connvars->cv_local_qpn = rspattr.create_return.qp_num; - - if (rspattr.create_return.receive_max_outstand_wr < - IBNAL_RX_MSGS || - rspattr.create_return.send_max_outstand_wr < - (1 + IBNAL_MAX_RDMA_FRAGS) * (*kibnal_tunables.kib_concurrent_sends)) { - CERROR("Insufficient rx/tx work items: wanted %d/%d got %d/%d\n", - IBNAL_RX_MSGS, - (1 + IBNAL_MAX_RDMA_FRAGS) * - (*kibnal_tunables.kib_concurrent_sends), - rspattr.create_return.receive_max_outstand_wr, - rspattr.create_return.send_max_outstand_wr); - goto failed; - } - - /* Mark init complete */ - conn->ibc_state = IBNAL_CONN_INIT; - - /* 1 ref for caller */ - atomic_set (&conn->ibc_refcount, 1); - return (conn); - - failed: - kibnal_destroy_conn (conn); - return (NULL); -} - -void -kibnal_destroy_conn (kib_conn_t *conn) -{ - vv_return_t vvrc; - - /* Only the connd does this (i.e. single threaded) */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - CDEBUG (D_NET, "connection %p\n", conn); - - LASSERT (atomic_read (&conn->ibc_refcount) == 0); - LASSERT (list_empty(&conn->ibc_early_rxs)); - LASSERT (list_empty(&conn->ibc_tx_queue)); - LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd)); - LASSERT (list_empty(&conn->ibc_tx_queue_nocred)); - LASSERT (list_empty(&conn->ibc_active_txs)); - LASSERT (conn->ibc_nsends_posted == 0); - - switch (conn->ibc_state) { - default: - /* conn must be completely disengaged from the network */ - LBUG(); - - case IBNAL_CONN_DISCONNECTED: - /* connvars should have been freed already */ - LASSERT (conn->ibc_connvars == NULL); - /* fall through */ - - case IBNAL_CONN_INIT: - vvrc = cm_destroy_cep(conn->ibc_cep); - LASSERT (vvrc == vv_return_ok); - /* fall through */ - - case IBNAL_CONN_INIT_QP: - kibnal_set_qp_state(conn, vv_qp_state_reset); - vvrc = vv_qp_destroy(kibnal_data.kib_hca, conn->ibc_qp); - if (vvrc != vv_return_ok) - CERROR("Can't destroy QP: %d\n", vvrc); - /* fall through */ - - case IBNAL_CONN_INIT_NOTHING: - break; - } - - if (conn->ibc_rx_pages != NULL) - kibnal_free_pages(conn->ibc_rx_pages); - - if (conn->ibc_rxs != NULL) - LIBCFS_FREE(conn->ibc_rxs, - IBNAL_RX_MSGS * sizeof(kib_rx_t)); - - if (conn->ibc_connvars != NULL) - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - - if (conn->ibc_peer != NULL) - kibnal_peer_decref(conn->ibc_peer); - - LIBCFS_FREE(conn, sizeof (*conn)); - - atomic_dec(&kibnal_data.kib_nconns); -} - -int -kibnal_close_peer_conns_locked (kib_peer_t *peer, int why) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - count++; - kibnal_close_conn_locked (conn, why); - } - - return (count); -} - -int -kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation) -{ - kib_conn_t *conn; - struct list_head *ctmp; - struct list_head *cnxt; - int count = 0; - - list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - if (conn->ibc_incarnation == incarnation) - continue; - - CDEBUG(D_NET, "Closing stale conn -> %s incarnation:"LPX64"("LPX64")\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_incarnation, incarnation); - - count++; - kibnal_close_conn_locked (conn, -ESTALE); - } - - return (count); -} - -int -kibnal_close_matching_conns (lnet_nid_t nid) -{ - kib_peer_t *peer; - struct list_head *ptmp; - struct list_head *pnxt; - int lo; - int hi; - int i; - unsigned long flags; - int count = 0; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (nid != LNET_NID_ANY) - lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers; - else { - lo = 0; - hi = kibnal_data.kib_peer_hash_size - 1; - } - - for (i = lo; i <= hi; i++) { - list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) { - - peer = list_entry (ptmp, kib_peer_t, ibp_list); - LASSERT (peer->ibp_persistence != 0 || - peer->ibp_connecting != 0 || - peer->ibp_accepting != 0 || - !list_empty (&peer->ibp_conns)); - - if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid)) - continue; - - count += kibnal_close_peer_conns_locked (peer, 0); - } - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* wildcards always succeed */ - if (nid == LNET_NID_ANY) - return (0); - - return (count == 0 ? -ENOENT : 0); -} - -int -kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - LASSERT (ni == kibnal_data.kib_ni); - - switch(cmd) { - case IOC_LIBCFS_GET_PEER: { - lnet_nid_t nid = 0; - __u32 ip = 0; - int share_count = 0; - - rc = kibnal_get_peer_info(data->ioc_count, - &nid, &ip, &share_count); - data->ioc_nid = nid; - data->ioc_count = share_count; - data->ioc_u32[0] = ip; - data->ioc_u32[1] = *kibnal_tunables.kib_service_number; /* port */ - break; - } - case IOC_LIBCFS_ADD_PEER: { - rc = kibnal_add_persistent_peer (data->ioc_nid, - data->ioc_u32[0]); /* IP */ - break; - } - case IOC_LIBCFS_DEL_PEER: { - rc = kibnal_del_peer (data->ioc_nid); - break; - } - case IOC_LIBCFS_GET_CONN: { - kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count); - - if (conn == NULL) - rc = -ENOENT; - else { - // kibnal_debug_conn(conn); - rc = 0; - data->ioc_nid = conn->ibc_peer->ibp_nid; - kibnal_conn_decref(conn); - } - break; - } - case IOC_LIBCFS_CLOSE_CONNECTION: { - rc = kibnal_close_matching_conns (data->ioc_nid); - break; - } - case IOC_LIBCFS_REGISTER_MYNID: { - if (ni->ni_nid == data->ioc_nid) { - rc = 0; - } else { - CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n", - libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(ni->ni_nid)); - rc = -EINVAL; - } - break; - } - } - - return rc; -} - -void -kibnal_free_pages (kib_pages_t *p) -{ - int npages = p->ibp_npages; - int i; - - for (i = 0; i < npages; i++) - if (p->ibp_pages[i] != NULL) - __free_page(p->ibp_pages[i]); - - LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages])); -} - -int -kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write) -{ - kib_pages_t *p; - int i; - - LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages])); - if (p == NULL) { - CERROR ("Can't allocate buffer %d\n", npages); - return (-ENOMEM); - } - - memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages])); - p->ibp_npages = npages; - - for (i = 0; i < npages; i++) { - p->ibp_pages[i] = alloc_page (GFP_KERNEL); - if (p->ibp_pages[i] == NULL) { - CERROR ("Can't allocate page %d of %d\n", i, npages); - kibnal_free_pages(p); - return (-ENOMEM); - } - } - - *pp = p; - return (0); -} - -int -kibnal_alloc_tx_descs (void) -{ - int i; - - LIBCFS_ALLOC (kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - if (kibnal_data.kib_tx_descs == NULL) - return -ENOMEM; - - memset(kibnal_data.kib_tx_descs, 0, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); - if (tx->tx_pages == NULL) - return -ENOMEM; -#else - LIBCFS_ALLOC(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - if (tx->tx_wrq == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - if (tx->tx_gl == NULL) - return -ENOMEM; - - LIBCFS_ALLOC(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); - if (tx->tx_rd == NULL) - return -ENOMEM; -#endif - } - - return 0; -} - -void -kibnal_free_tx_descs (void) -{ - int i; - - if (kibnal_data.kib_tx_descs == NULL) - return; - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - kib_tx_t *tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - if (tx->tx_pages != NULL) - LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV * - sizeof(*tx->tx_pages)); -#else - if (tx->tx_wrq != NULL) - LIBCFS_FREE(tx->tx_wrq, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_wrq)); - - if (tx->tx_gl != NULL) - LIBCFS_FREE(tx->tx_gl, - (1 + IBNAL_MAX_RDMA_FRAGS) * - sizeof(*tx->tx_gl)); - - if (tx->tx_rd != NULL) - LIBCFS_FREE(tx->tx_rd, - offsetof(kib_rdma_desc_t, - rd_frags[IBNAL_MAX_RDMA_FRAGS])); -#endif - } - - LIBCFS_FREE(kibnal_data.kib_tx_descs, - IBNAL_TX_MSGS() * sizeof(kib_tx_t)); -} - -#if IBNAL_USE_FMR -void -kibnal_free_fmrs (int n) -{ - int i; - vv_return_t vvrc; - kib_tx_t *tx; - - for (i = 0; i < n; i++) { - tx = &kibnal_data.kib_tx_descs[i]; - - vvrc = vv_free_fmr(kibnal_data.kib_hca, - tx->tx_md.md_fmrhandle); - if (vvrc != vv_return_ok) - CWARN("vv_free_fmr[%d]: %d\n", i, vvrc); - } -} -#endif - -int -kibnal_setup_tx_descs (void) -{ - int ipage = 0; - int page_offset = 0; - struct page *page; - kib_tx_t *tx; - vv_mem_reg_h_t mem_h; - vv_r_key_t rkey; - vv_return_t vvrc; - int i; - int rc; -#if IBNAL_USE_FMR - vv_fmr_t fmr_props; -#endif - - /* pre-mapped messages are not bigger than 1 page */ - CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE); - - /* No fancy arithmetic when we do the buffer calculations */ - CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0); - - rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, - IBNAL_TX_MSG_PAGES(), 0); - if (rc != 0) - return (rc); - - for (i = 0; i < IBNAL_TX_MSGS(); i++) { - page = kibnal_data.kib_tx_pages->ibp_pages[ipage]; - tx = &kibnal_data.kib_tx_descs[i]; - -#if IBNAL_USE_FMR - memset(&fmr_props, 0, sizeof(fmr_props)); - fmr_props.pd_hndl = kibnal_data.kib_pd; - fmr_props.acl = (vv_acc_r_mem_write | - vv_acc_l_mem_write); - fmr_props.max_pages = LNET_MAX_IOV; - fmr_props.log2_page_sz = PAGE_SHIFT; - fmr_props.max_outstanding_maps = *kibnal_tunables.kib_fmr_remaps; - - vvrc = vv_alloc_fmr(kibnal_data.kib_hca, - &fmr_props, - &tx->tx_md.md_fmrhandle); - if (vvrc != vv_return_ok) { - CERROR("Can't allocate fmr %d: %d\n", i, vvrc); - - kibnal_free_fmrs(i); - kibnal_free_pages (kibnal_data.kib_tx_pages); - return -ENOMEM; - } - - tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps; - tx->tx_md.md_active = 0; -#endif - tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + - page_offset); - - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - tx->tx_msg, - IBNAL_MSG_SIZE, - &mem_h, - &tx->tx_lkey, - &rkey); - LASSERT (vvrc == vv_return_ok); - - CDEBUG(D_NET, "Tx[%d] %p->%p[%x]\n", i, tx, - tx->tx_msg, tx->tx_lkey); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - page_offset += IBNAL_MSG_SIZE; - LASSERT (page_offset <= PAGE_SIZE); - - if (page_offset == PAGE_SIZE) { - page_offset = 0; - ipage++; - LASSERT (ipage <= IBNAL_TX_MSG_PAGES()); - } - } - - return (0); -} - -void -kibnal_shutdown (lnet_ni_t *ni) -{ - int i; - vv_return_t vvrc; - - LASSERT (ni == kibnal_data.kib_ni); - LASSERT (ni->ni_data == &kibnal_data); - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - switch (kibnal_data.kib_init) { - - case IBNAL_INIT_ALL: - /* stop accepting connections and prevent new peers */ - kibnal_stop_listener(ni); - - /* nuke all existing peers */ - kibnal_del_peer(LNET_NID_ANY); - - /* Wait for all peer state to clean up */ - i = 2; - while (atomic_read(&kibnal_data.kib_npeers) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* 2**n? */ - "waiting for %d peers to disconnect\n", - atomic_read(&kibnal_data.kib_npeers)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_CQ: - vvrc = vv_cq_destroy(kibnal_data.kib_hca, kibnal_data.kib_cq); - if (vvrc != vv_return_ok) - CERROR ("Destroy CQ error: %d\n", vvrc); - /* fall through */ - - case IBNAL_INIT_TXD: - kibnal_free_pages (kibnal_data.kib_tx_pages); -#if IBNAL_USE_FMR - kibnal_free_fmrs(IBNAL_TX_MSGS()); -#endif - /* fall through */ - - case IBNAL_INIT_PD: -#if 0 - /* Only deallocate a PD if we actually allocated one */ - vvrc = vv_pd_deallocate(kibnal_data.kib_hca, - kibnal_data.kib_pd); - if (vvrc != vv_return_ok) - CERROR ("Destroy PD error: %d\n", vvrc); -#endif - /* fall through */ - - case IBNAL_INIT_ASYNC: - vvrc = vv_dell_async_event_cb (kibnal_data.kib_hca, - kibnal_async_callback); - if (vvrc != vv_return_ok) - CERROR("vv_dell_async_event_cb error: %d\n", vvrc); - - /* fall through */ - - case IBNAL_INIT_HCA: - vvrc = vv_hca_close(kibnal_data.kib_hca); - if (vvrc != vv_return_ok) - CERROR ("Close HCA error: %d\n", vvrc); - /* fall through */ - - case IBNAL_INIT_DATA: - LASSERT (atomic_read(&kibnal_data.kib_npeers) == 0); - LASSERT (kibnal_data.kib_peers != NULL); - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) { - LASSERT (list_empty (&kibnal_data.kib_peers[i])); - } - LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0); - LASSERT (list_empty (&kibnal_data.kib_connd_zombies)); - LASSERT (list_empty (&kibnal_data.kib_connd_conns)); - LASSERT (list_empty (&kibnal_data.kib_connd_pcreqs)); - LASSERT (list_empty (&kibnal_data.kib_connd_peers)); - - /* flag threads to terminate; wake and wait for them to die */ - kibnal_data.kib_shutdown = 1; - wake_up_all (&kibnal_data.kib_sched_waitq); - wake_up_all (&kibnal_data.kib_connd_waitq); - - i = 2; - while (atomic_read (&kibnal_data.kib_nthreads) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - atomic_read (&kibnal_data.kib_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - /* fall through */ - - case IBNAL_INIT_NOTHING: - break; - } - - kibnal_free_tx_descs(); - - if (kibnal_data.kib_peers != NULL) - LIBCFS_FREE (kibnal_data.kib_peers, - sizeof (struct list_head) * - kibnal_data.kib_peer_hash_size); - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&libcfs_kmemory)); - - kibnal_data.kib_init = IBNAL_INIT_NOTHING; - PORTAL_MODULE_UNUSE; -} - -int -kibnal_startup (lnet_ni_t *ni) -{ - char scratch[32]; - char ipif_name[32]; - char *hca_name; - __u32 ip; - __u32 netmask; - int up; - int nob; - int devno; - struct timeval tv; - int rc; - int i; - vv_request_event_record_t req_er; - vv_return_t vvrc; - - LASSERT (ni->ni_lnd == &the_kiblnd); - - /* Only 1 instance supported */ - if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) { - CERROR ("Only 1 instance supported\n"); - return -EPERM; - } - - if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) { - CERROR ("Can't set credits(%d) > ntx(%d)\n", - *kibnal_tunables.kib_credits, - *kibnal_tunables.kib_ntx); - return -EINVAL; - } - - ni->ni_maxtxcredits = *kibnal_tunables.kib_credits; - ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits; - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[0] != NULL) { - /* Use the HCA specified in 'networks=' */ - - if (ni->ni_interfaces[1] != NULL) { - CERROR("Multiple interfaces not supported\n"); - return -EPERM; - } - - /* Parse <hca base name><number> */ - hca_name = ni->ni_interfaces[0]; - nob = strlen(*kibnal_tunables.kib_hca_basename); - - if (strncmp(hca_name, *kibnal_tunables.kib_hca_basename, nob) || - sscanf(hca_name + nob, "%d%n", &devno, &nob) < 1) { - CERROR("Unrecognised HCA %s\n", hca_name); - return -EINVAL; - } - - } else { - /* Use <hca base name>0 */ - devno = 0; - - hca_name = scratch; - snprintf(hca_name, sizeof(scratch), "%s%d", - *kibnal_tunables.kib_hca_basename, devno); - if (strlen(hca_name) == sizeof(scratch) - 1) { - CERROR("HCA name %s truncated\n", hca_name); - return -EINVAL; - } - } - - /* Find IP address from <ipif base name><hca number> */ - snprintf(ipif_name, sizeof(ipif_name), "%s%d", - *kibnal_tunables.kib_ipif_basename, devno); - if (strlen(ipif_name) == sizeof(ipif_name - 1)) { - CERROR("IPoIB interface name %s truncated\n", ipif_name); - return -EINVAL; - } - - rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask); - if (rc != 0) { - CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc); - return -ENETDOWN; - } - - if (!up) { - CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip); - - PORTAL_MODULE_USE; - memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */ - - kibnal_data.kib_ni = ni; - ni->ni_data = &kibnal_data; - - do_gettimeofday(&tv); - kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - rwlock_init(&kibnal_data.kib_global_lock); - - kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE; - LIBCFS_ALLOC (kibnal_data.kib_peers, - sizeof (struct list_head) * kibnal_data.kib_peer_hash_size); - if (kibnal_data.kib_peers == NULL) { - goto failed; - } - for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) - INIT_LIST_HEAD(&kibnal_data.kib_peers[i]); - - spin_lock_init (&kibnal_data.kib_connd_lock); - INIT_LIST_HEAD (&kibnal_data.kib_connd_peers); - INIT_LIST_HEAD (&kibnal_data.kib_connd_pcreqs); - INIT_LIST_HEAD (&kibnal_data.kib_connd_conns); - INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies); - init_waitqueue_head (&kibnal_data.kib_connd_waitq); - - spin_lock_init (&kibnal_data.kib_sched_lock); - init_waitqueue_head (&kibnal_data.kib_sched_waitq); - - spin_lock_init (&kibnal_data.kib_tx_lock); - INIT_LIST_HEAD (&kibnal_data.kib_idle_txs); - - rc = kibnal_alloc_tx_descs(); - if (rc != 0) { - CERROR("Can't allocate tx descs\n"); - goto failed; - } - - /* lists/ptrs/locks initialised */ - kibnal_data.kib_init = IBNAL_INIT_DATA; - /*****************************************************/ - - for (i = 0; i < IBNAL_N_SCHED; i++) { - rc = kibnal_thread_start (kibnal_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn vibnal scheduler[%d]: %d\n", - i, rc); - goto failed; - } - } - - rc = kibnal_thread_start (kibnal_connd, NULL); - if (rc != 0) { - CERROR ("Can't spawn vibnal connd: %d\n", rc); - goto failed; - } - - vvrc = vv_hca_open(hca_name, NULL, &kibnal_data.kib_hca); - if (vvrc != vv_return_ok) { - CERROR ("Can't open HCA %s: %d\n", hca_name, vvrc); - goto failed; - } - - /* Channel Adapter opened */ - kibnal_data.kib_init = IBNAL_INIT_HCA; - - /* register to get HCA's asynchronous events. */ - req_er.req_event_type = VV_ASYNC_EVENT_ALL_MASK; - vvrc = vv_set_async_event_cb (kibnal_data.kib_hca, req_er, - kibnal_async_callback); - if (vvrc != vv_return_ok) { - CERROR ("Can't set HCA %s callback: %d\n", hca_name, vvrc); - goto failed; - } - - kibnal_data.kib_init = IBNAL_INIT_ASYNC; - - /*****************************************************/ - - vvrc = vv_hca_query(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs); - if (vvrc != vv_return_ok) { - CERROR ("Can't size port attrs for %s: %d\n", hca_name, vvrc); - goto failed; - } - - kibnal_data.kib_port = -1; - - for (i = 0; i<kibnal_data.kib_hca_attrs.port_num; i++) { - - int port_num = i+1; - u_int32_t tbl_count; - vv_port_attrib_t *pattr = &kibnal_data.kib_port_attr; - - vvrc = vv_port_query(kibnal_data.kib_hca, port_num, pattr); - if (vvrc != vv_return_ok) { - CERROR("vv_port_query failed for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - switch (pattr->port_state) { - case vv_state_linkDoun: - CDEBUG(D_NET, "port[%d] Down\n", port_num); - continue; - case vv_state_linkInit: - CDEBUG(D_NET, "port[%d] Init\n", port_num); - continue; - case vv_state_linkArm: - CDEBUG(D_NET, "port[%d] Armed\n", port_num); - continue; - case vv_state_linkActive: - CDEBUG(D_NET, "port[%d] Active\n", port_num); - - /* Found a suitable port. Get its GUID and PKEY. */ - tbl_count = 1; - vvrc = vv_get_port_gid_tbl(kibnal_data.kib_hca, - port_num, &tbl_count, - &kibnal_data.kib_port_gid); - if (vvrc != vv_return_ok) { - CERROR("vv_get_port_gid_tbl failed " - "for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - tbl_count = 1; - vvrc = vv_get_port_partition_tbl(kibnal_data.kib_hca, - port_num, &tbl_count, - &kibnal_data.kib_port_pkey); - if (vvrc != vv_return_ok) { - CERROR("vv_get_port_partition_tbl failed " - "for %s port %d: %d\n", - hca_name, port_num, vvrc); - continue; - } - - kibnal_data.kib_port = port_num; - - break; - case vv_state_linkActDefer: /* TODO: correct? */ - case vv_state_linkNoChange: - CERROR("Unexpected %s port[%d] state %d\n", - hca_name, i, pattr->port_state); - continue; - } - break; - } - - if (kibnal_data.kib_port == -1) { - CERROR ("Can't find an active port on %s\n", hca_name); - goto failed; - } - - CDEBUG(D_NET, "Using %s port %d - GID="LPX64":"LPX64"\n", - hca_name, kibnal_data.kib_port, - kibnal_data.kib_port_gid.scope.g.subnet, - kibnal_data.kib_port_gid.scope.g.eui64); - - /*****************************************************/ - -#if 1 - /* We use a pre-allocated PD */ - vvrc = vv_get_gen_pd_h(kibnal_data.kib_hca, &kibnal_data.kib_pd); -#else - vvrc = vv_pd_allocate(kibnal_data.kib_hca, &kibnal_data.kib_pd); -#endif - if (vvrc != vv_return_ok) { - CERROR ("Can't init PD: %d\n", vvrc); - goto failed; - } - - /* flag PD initialised */ - kibnal_data.kib_init = IBNAL_INIT_PD; - /*****************************************************/ - - rc = kibnal_setup_tx_descs(); - if (rc != 0) { - CERROR ("Can't register tx descs: %d\n", rc); - goto failed; - } - - /* flag TX descs initialised */ - kibnal_data.kib_init = IBNAL_INIT_TXD; - /*****************************************************/ - - { - uint32_t nentries; - - vvrc = vv_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(), - kibnal_cq_callback, - NULL, /* context */ - &kibnal_data.kib_cq, &nentries); - if (vvrc != 0) { - CERROR ("Can't create RX CQ: %d\n", vvrc); - goto failed; - } - - /* flag CQ initialised */ - kibnal_data.kib_init = IBNAL_INIT_CQ; - - if (nentries < IBNAL_CQ_ENTRIES()) { - CERROR ("CQ only has %d entries, need %d\n", - nentries, IBNAL_CQ_ENTRIES()); - goto failed; - } - - vvrc = vv_request_completion_notification(kibnal_data.kib_hca, - kibnal_data.kib_cq, - vv_next_solicit_unsolicit_event); - if (vvrc != 0) { - CERROR ("Failed to re-arm completion queue: %d\n", rc); - goto failed; - } - } - - rc = kibnal_start_listener(ni); - if (rc != 0) { - CERROR("Can't start listener: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kibnal_data.kib_init = IBNAL_INIT_ALL; - /*****************************************************/ - - return (0); - - failed: - CDEBUG(D_NET, "kibnal_startup failed\n"); - kibnal_shutdown (ni); - return (-ENETDOWN); -} - -void __exit -kibnal_module_fini (void) -{ - lnet_unregister_lnd(&the_kiblnd); - kibnal_tunables_fini(); -} - -int __init -kibnal_module_init (void) -{ - int rc; - - vibnal_assert_wire_constants(); - - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) - <= cm_REQ_priv_data_len); - CLASSERT (offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t) - <= cm_REP_priv_data_len); - CLASSERT (sizeof(kib_msg_t) <= IBNAL_MSG_SIZE); -#if !IBNAL_USE_FMR - CLASSERT (offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS]) - <= IBNAL_MSG_SIZE); - CLASSERT (offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[IBNAL_MAX_RDMA_FRAGS]) - <= IBNAL_MSG_SIZE); -#endif - rc = kibnal_tunables_init(); - if (rc != 0) - return rc; - - lnet_register_lnd(&the_kiblnd); - - return 0; -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel Voltaire IB LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kibnal_module_init); -module_exit(kibnal_module_fini); - diff --git a/lnet/klnds/viblnd/viblnd.h b/lnet/klnds/viblnd/viblnd.h deleted file mode 100644 index aae8d1ebd3e48df1997a8a3d9187135a9c1e1d8d..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd.h +++ /dev/null @@ -1,674 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/io.h> - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <linux/sysctl.h> -#include <linux/random.h> - -#include <net/sock.h> -#include <linux/in.h> - -#define DEBUG_SUBSYSTEM S_LND - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include <lnet/lib-lnet.h> - -/* CPU_{L,B}E #defines needed by Voltaire headers */ -#include <asm/byteorder.h> -#ifdef __BIG_ENDIAN__ -#define CPU_BE 1 -#define CPU_LE 0 -#endif -#ifdef __LITTLE_ENDIAN__ -#define CPU_BE 0 -#define CPU_LE 1 -#endif - -#include <vverbs.h> -#include <ib-cm.h> -#include <ibat.h> - -/* GCC 3.2.2, miscompiles this driver. - * See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */ -#define GCC_VERSION ((__GNUC__*100 + __GNUC_MINOR__)*100 + __GNUC_PATCHLEVEL__) -#if (GCC_VERSION >= 30000) && (GCC_VERSION < 30203) -# error Invalid GCC version. Must use GCC < 3.0.0 || GCC >= 3.2.3 -#endif - -#ifdef CONFIG_SMP -# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define IBNAL_N_SCHED 1 /* # schedulers */ -#endif - -#define IBNAL_USE_FMR 1 - -/* tunables fixed at compile time */ -#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */ -#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */ -#define IBNAL_CREDIT_HIGHWATER 7 /* when eagerly to return credits */ -#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */ - -/* constants derived from sdp-connection.c */ -#define IBNAL_QKEY 0 -#define IBNAL_PKEY 0xffff -#define IBNAL_PKEY_IDX 0 -#define IBNAL_SGID_IDX 0 -#define IBNAL_SERVICE_LEVEL 0 -#define IBNAL_STATIC_RATE 0 -#define IBNAL_EE_FLOW_CNT 1 -#define IBNAL_LOCAL_SUB 1 -#define IBNAL_TRAFFIC_CLASS 0 -#define IBNAL_SOURCE_PATH_BIT 0 -#define IBNAL_OUS_DST_RD 1 -#define IBNAL_IB_MTU vv_mtu_1024 - -/* constants derived from sdp-hca-params.h */ -#define PATH_RATE_2_5GB 2 -#define MLX_IPD_1x 1 -#define MLX_IPD_4x 0 -#define IBNAL_R_2_STATIC_RATE(r) ((r) == PATH_RATE_2_5GB ? MLX_IPD_1x : MLX_IPD_4x) - -/* other low-level IB constants */ -#define IBNAL_PKT_LIFETIME 5 -#define IBNAL_ARB_INITIATOR_DEPTH 0 -#define IBNAL_ARB_RESP_RES 0 -#define IBNAL_FAILOVER_ACCEPTED 0 - -/************************/ -/* derived constants... */ - -/* TX messages (shared by all connections) */ -#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx) -#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE) -#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE) - -#if IBNAL_USE_FMR -# define IBNAL_MAX_RDMA_FRAGS 1 -# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS -#else -# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV -# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE -#endif - -/* RX messages (per connection) */ -#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE*2) -#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE) -#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE) - -#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \ - IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers) - -typedef struct -{ - unsigned int *kib_service_number; /* IB service number */ - int *kib_min_reconnect_interval; /* first failed connection retry... */ - int *kib_max_reconnect_interval; /* ...exponentially increasing to this */ - int *kib_concurrent_peers; /* max # nodes all talking to me */ - int *kib_cksum; /* checksum kib_msg_t? */ - int *kib_timeout; /* comms timeout (seconds) */ - int *kib_ntx; /* # tx descs */ - int *kib_credits; /* # concurrent sends */ - int *kib_peercredits; /* # concurrent sends to 1 peer */ - int *kib_arp_retries; /* # times to retry ARP */ - char **kib_hca_basename; /* HCA base name */ - char **kib_ipif_basename; /* IPoIB interface base name */ - int *kib_local_ack_timeout; /* IB RC QP ack timeout... */ - int *kib_retry_cnt; /* ...and retry */ - int *kib_rnr_cnt; /* RNR retries... */ - int *kib_rnr_nak_timer; /* ...and interval */ - int *kib_keepalive; /* keepalive interval */ - int *kib_concurrent_sends; /* send work queue sizing */ -#if IBNAL_USE_FMR - int *kib_fmr_remaps; /* # FMR maps before unmap required */ -#endif -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kib_sysctl; /* sysctl interface */ -#endif -} kib_tunables_t; - -typedef struct -{ - int ibp_npages; /* # pages */ - struct page *ibp_pages[0]; -} kib_pages_t; - -#if IBNAL_USE_FMR -typedef struct -{ - vv_fmr_h_t md_fmrhandle; /* FMR handle */ - int md_fmrcount; /* # mappings left */ - int md_active; /* mapping in use? */ - __u32 md_lkey; /* local key */ - __u32 md_rkey; /* remote key */ - __u64 md_addr; /* IO VM address */ -} kib_md_t; -#endif - -typedef struct -{ - int kib_init; /* initialisation state */ - __u64 kib_incarnation; /* which one am I */ - int kib_shutdown; /* shut down? */ - atomic_t kib_nthreads; /* # live threads */ - lnet_ni_t *kib_ni; /* _the_ nal instance */ - - vv_gid_t kib_port_gid; /* device/port GID */ - vv_p_key_t kib_port_pkey; /* device/port pkey */ - - cm_cep_handle_t kib_listen_handle; /* IB listen handle */ - - rwlock_t kib_global_lock; /* stabilize peer/conn ops */ - int kib_ready; /* CQ callback fired */ - int kib_checking_cq; /* a scheduler is checking the CQ */ - - struct list_head *kib_peers; /* hash table of all my known peers */ - int kib_peer_hash_size; /* size of kib_peers */ - atomic_t kib_npeers; /* # peers extant */ - atomic_t kib_nconns; /* # connections extant */ - - void *kib_connd; /* the connd task (serialisation assertions) */ - struct list_head kib_connd_peers; /* peers wanting to get connected */ - struct list_head kib_connd_pcreqs; /* passive connection requests */ - struct list_head kib_connd_conns; /* connections to setup/teardown */ - struct list_head kib_connd_zombies; /* connections with zero refcount */ - wait_queue_head_t kib_connd_waitq; /* connection daemon sleeps here */ - spinlock_t kib_connd_lock; /* serialise */ - - wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */ - spinlock_t kib_sched_lock; /* serialise */ - - struct kib_tx *kib_tx_descs; /* all the tx descriptors */ - kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ - - struct list_head kib_idle_txs; /* idle tx descriptors */ - __u64 kib_next_tx_cookie; /* RDMA completion cookie */ - spinlock_t kib_tx_lock; /* serialise */ - - vv_hca_h_t kib_hca; /* The HCA */ - vv_hca_attrib_t kib_hca_attrs; /* its properties */ - int kib_port; /* port on the device */ - vv_port_attrib_t kib_port_attr; /* its properties */ - - vv_pd_h_t kib_pd; /* protection domain */ - vv_cq_h_t kib_cq; /* completion queue */ - -} kib_data_t; - -#define IBNAL_INIT_NOTHING 0 -#define IBNAL_INIT_DATA 1 -#define IBNAL_INIT_LIB 2 -#define IBNAL_INIT_HCA 3 -#define IBNAL_INIT_ASYNC 4 -#define IBNAL_INIT_PD 5 -#define IBNAL_INIT_TXD 6 -#define IBNAL_INIT_CQ 7 -#define IBNAL_INIT_ALL 8 - -#include "viblnd_wire.h" - -/***********************************************************************/ - -typedef struct kib_rx /* receive message */ -{ - struct list_head rx_list; /* queue for attention */ - struct kib_conn *rx_conn; /* owning conn */ - int rx_nob; /* # bytes received (-1 while posted) */ - vv_l_key_t rx_lkey; /* local key */ - kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */ - vv_wr_t rx_wrq; /* receive work item */ - vv_scatgat_t rx_gl; /* and its memory */ -} kib_rx_t; - -typedef struct kib_tx /* transmit message */ -{ - struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */ - struct kib_conn *tx_conn; /* owning conn */ - int tx_sending; /* # tx callbacks outstanding */ - int tx_queued; /* queued for sending */ - int tx_waiting; /* waiting for peer */ - int tx_status; /* completion status */ - unsigned long tx_deadline; /* completion deadline */ - __u64 tx_cookie; /* completion cookie */ - lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */ - vv_l_key_t tx_lkey; /* local key for message buffer */ - kib_msg_t *tx_msg; /* message buffer (host vaddr) */ - int tx_nwrq; /* # send work items */ -#if IBNAL_USE_FMR - vv_wr_t tx_wrq[2]; /* send work items... */ - vv_scatgat_t tx_gl[2]; /* ...and their memory */ - kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */ - kib_md_t tx_md; /* FMR mapping descriptor */ - __u64 *tx_pages; /* page phys addrs */ -#else - vv_wr_t *tx_wrq; /* send work items... */ - vv_scatgat_t *tx_gl; /* ...and their memory */ - kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */ -#endif -} kib_tx_t; - -/* Passive connection request (listener callback) queued for handling by connd */ -typedef struct kib_pcreq -{ - struct list_head pcr_list; /* queue for handling by connd */ - cm_cep_handle_t pcr_cep; /* listening handle */ - cm_request_data_t pcr_cmreq; /* request data */ -} kib_pcreq_t; - -typedef struct kib_connvars -{ - /* connection-in-progress variables */ - __u32 cv_port; - __u32 cv_pkey_index; - __u32 cv_rnr_count; - __u32 cv_sgid_index; - __u32 cv_remote_qpn; - __u32 cv_local_qpn; - __u32 cv_rxpsn; - __u32 cv_txpsn; - ib_path_record_v2_t cv_path; - ibat_arp_data_t cv_arp; - ibat_stat_t cv_arprc; - cm_conn_data_t cv_conndata; -} kib_connvars_t; - -typedef struct kib_conn -{ - struct kib_peer *ibc_peer; /* owning peer */ - struct list_head ibc_list; /* stash on peer's conn list */ - __u64 ibc_incarnation; /* which instance of the peer */ - __u64 ibc_txseq; /* tx sequence number */ - __u64 ibc_rxseq; /* rx sequence number */ - __u32 ibc_version; /* peer protocol version */ - atomic_t ibc_refcount; /* # users */ - int ibc_state; /* what's happening */ - int ibc_nsends_posted; /* # uncompleted sends */ - int ibc_credits; /* # credits I have */ - int ibc_outstanding_credits; /* # credits to return */ - int ibc_reserved_credits; /* # credits for ACK/DONE msgs */ - int ibc_disconnect; /* some disconnect callback fired */ - int ibc_comms_error; /* set on comms error */ - unsigned long ibc_last_send; /* time of last send */ - struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */ - struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */ - struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */ - struct list_head ibc_tx_queue; /* send queue */ - struct list_head ibc_active_txs; /* active tx awaiting completion */ - spinlock_t ibc_lock; /* serialise */ - kib_rx_t *ibc_rxs; /* the rx descs */ - kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */ - vv_qp_h_t ibc_qp; /* queue pair */ - cm_cep_handle_t ibc_cep; /* connection endpoint */ - kib_connvars_t *ibc_connvars; /* in-progress connection state */ -} kib_conn_t; - -#define IBNAL_CONN_INIT_NOTHING 0 /* incomplete init */ -#define IBNAL_CONN_INIT_QP 1 /* QP allocated */ -#define IBNAL_CONN_INIT 2 /* completed init */ -#define IBNAL_CONN_ACTIVE_ARP 3 /* active arping */ -#define IBNAL_CONN_ACTIVE_CONNECT 4 /* active sending req */ -#define IBNAL_CONN_ACTIVE_CHECK_REPLY 5 /* active checking reply */ -#define IBNAL_CONN_ACTIVE_RTU 6 /* active sending rtu */ -#define IBNAL_CONN_PASSIVE_WAIT 7 /* passive waiting for rtu */ -#define IBNAL_CONN_ESTABLISHED 8 /* connection established */ -#define IBNAL_CONN_DISCONNECT1 9 /* disconnect phase 1 */ -#define IBNAL_CONN_DISCONNECT2 10 /* disconnect phase 2 */ -#define IBNAL_CONN_DISCONNECTED 11 /* disconnect complete */ - -typedef struct kib_peer -{ - struct list_head ibp_list; /* stash on global peer list */ - struct list_head ibp_connd_list; /* schedule on kib_connd_peers */ - lnet_nid_t ibp_nid; /* who's on the other end(s) */ - __u32 ibp_ip; /* IP to query for peer conn params */ - int ibp_port; /* port to qery for peer conn params */ - __u64 ibp_incarnation; /* peer's incarnation */ - atomic_t ibp_refcount; /* # users */ - int ibp_persistence; /* "known" peer refs */ - struct list_head ibp_conns; /* all active connections */ - struct list_head ibp_tx_queue; /* msgs waiting for a conn */ - int ibp_connecting; /* current active connection attempts */ - int ibp_accepting; /* current passive connection attempts */ - int ibp_arp_count; /* # arp attempts */ - unsigned long ibp_reconnect_time; /* when reconnect may be attempted */ - unsigned long ibp_reconnect_interval; /* exponential backoff */ - int ibp_error; /* errno on closing this peer */ - cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */ -} kib_peer_t; - - -extern kib_data_t kibnal_data; -extern kib_tunables_t kibnal_tunables; - -int kibnal_startup (lnet_ni_t *ni); -void kibnal_shutdown (lnet_ni_t *ni); -int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -extern int kibnal_eager_recv (lnet_ni_t *ni, void *private, - lnet_msg_t *lntmsg, void **new_private); -int kibnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -extern void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob); -extern void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits, - lnet_nid_t dstnid, __u64 dststamp, __u64 seq); -extern int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob); -extern int kibnal_create_peer(kib_peer_t **peerp, lnet_nid_t nid); -extern void kibnal_destroy_peer(kib_peer_t *peer); -extern int kibnal_add_persistent_peer (lnet_nid_t nid, __u32 ip); -extern int kibnal_del_peer(lnet_nid_t nid); -extern kib_peer_t *kibnal_find_peer_locked(lnet_nid_t nid); -extern void kibnal_unlink_peer_locked(kib_peer_t *peer); -extern void kibnal_peer_alive(kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked(kib_peer_t *peer, - __u64 incarnation); -extern kib_conn_t *kibnal_create_conn(cm_cep_handle_t cep); -extern void kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *info, void *arg); - -extern int kibnal_alloc_pages(kib_pages_t **pp, int npages, int access); -extern void kibnal_free_pages(kib_pages_t *p); - -extern void kibnal_check_sends(kib_conn_t *conn); -extern void kibnal_close_conn_locked(kib_conn_t *conn, int error); -extern void kibnal_destroy_conn(kib_conn_t *conn); -extern int kibnal_thread_start(int (*fn)(void *arg), void *arg); -extern int kibnal_scheduler(void *arg); -extern int kibnal_connd(void *arg); -extern void kibnal_init_tx_msg(kib_tx_t *tx, int type, int body_nob); -extern void kibnal_close_conn(kib_conn_t *conn, int why); -extern int kibnal_set_qp_state(kib_conn_t *conn, vv_qp_state_t new_state); -extern void kibnal_async_callback(vv_event_record_t ev); -extern void kibnal_cq_callback(unsigned long context); -extern void kibnal_passive_connreq(kib_pcreq_t *pcr, int reject); -extern void kibnal_txlist_done (struct list_head *txlist, int status); -extern void kibnal_queue_tx(kib_tx_t *tx, kib_conn_t *conn); -extern int kibnal_init_rdma(kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie); -extern int kibnal_tunables_init(void); -extern void kibnal_tunables_fini(void); - -#define kibnal_conn_addref(conn) \ -do { \ - CDEBUG(D_NET, "conn[%p] (%d)++\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - atomic_inc(&(conn)->ibc_refcount); \ -} while (0) - -#define kibnal_conn_decref(conn) \ -do { \ - unsigned long flags; \ - \ - CDEBUG(D_NET, "conn[%p] (%d)--\n", \ - (conn), atomic_read(&(conn)->ibc_refcount)); \ - LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \ - if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \ - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \ - list_add_tail(&(conn)->ibc_list, \ - &kibnal_data.kib_connd_zombies); \ - wake_up(&kibnal_data.kib_connd_waitq); \ - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \ - } \ -} while (0) - -#define kibnal_peer_addref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - atomic_inc(&(peer)->ibp_refcount); \ -} while (0) - -#define kibnal_peer_decref(peer) \ -do { \ - CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \ - (peer), libcfs_nid2str((peer)->ibp_nid), \ - atomic_read (&(peer)->ibp_refcount)); \ - LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \ - if (atomic_dec_and_test(&(peer)->ibp_refcount)) \ - kibnal_destroy_peer(peer); \ -} while (0) - -static inline struct list_head * -kibnal_nid2peerlist (lnet_nid_t nid) -{ - unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - - return (&kibnal_data.kib_peers [hash]); -} - -static inline int -kibnal_peer_active (kib_peer_t *peer) -{ - /* Am I in the peer hash table? */ - return (!list_empty(&peer->ibp_list)); -} - -static inline void -kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn) -{ - struct list_head *q; - - LASSERT (tx->tx_nwrq > 0); /* work items set up */ - LASSERT (!tx->tx_queued); /* not queued for sending already */ - - tx->tx_queued = 1; - tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ); - - if (tx->tx_conn == NULL) { - kibnal_conn_addref(conn); - tx->tx_conn = conn; - LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE); - } else { - LASSERT (tx->tx_conn == conn); - LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE); - } - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* All messages have simple credit control */ - q = &conn->ibc_tx_queue; - } else { - LASSERT (conn->ibc_version == IBNAL_MSG_VERSION); - - switch (tx->tx_msg->ibm_type) { - case IBNAL_MSG_PUT_REQ: - case IBNAL_MSG_GET_REQ: - /* RDMA request: reserve a buffer for the RDMA reply - * before sending */ - q = &conn->ibc_tx_queue_rsrvd; - break; - - case IBNAL_MSG_PUT_NAK: - case IBNAL_MSG_PUT_ACK: - case IBNAL_MSG_PUT_DONE: - case IBNAL_MSG_GET_DONE: - /* RDMA reply/completion: no credits; peer has reserved - * a reply buffer */ - q = &conn->ibc_tx_queue_nocred; - break; - - case IBNAL_MSG_NOOP: - case IBNAL_MSG_IMMEDIATE: - /* Otherwise: consume a credit before sending */ - q = &conn->ibc_tx_queue; - break; - - default: - LBUG(); - q = NULL; - } - } - - list_add_tail(&tx->tx_list, q); -} - -static inline int -kibnal_send_keepalive(kib_conn_t *conn) -{ - return (*kibnal_tunables.kib_keepalive > 0) && - time_after(jiffies, conn->ibc_last_send + - *kibnal_tunables.kib_keepalive*HZ); -} - -#ifndef IBNAL_VOIDSTAR_SGADDR -# define IBNAL_VOIDSTAR_SGADDR 0 -#endif - -#if IBNAL_VOIDSTAR_SGADDR -# if CONFIG_HIGHMEM -# if CONFIG_X86 && CONFIG_HIGHMEM4G - /* truncation to void* doesn't matter if 0 <= physmem < 4G - * so allow x86 with 32 bit phys addrs */ -# elif CONFIG_IA64 - /* OK anyway on 64-bit arch */ -# else -# error "Can't support HIGHMEM when vv_scatgat_t::v_address is void *" -# endif -# endif -# define KIBNAL_ADDR2SG(a) ((void *)((unsigned long)(a))) -# define KIBNAL_SG2ADDR(a) ((__u64)((unsigned long)(a))) -static inline __u64 kibnal_addr2net (__u64 addr) -{ - void *netaddr; - vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca, - KIBNAL_ADDR2SG(addr), - &netaddr); - LASSERT (vvrc == vv_return_ok); - return KIBNAL_SG2ADDR(netaddr); -} -#else -# define KIBNAL_ADDR2SG(a) a -# define KIBNAL_SG2ADDR(a) a -static inline __u64 kibnal_addr2net (__u64 addr) -{ - __u64 netaddr; - vv_return_t vvrc = vv_va2advertise_addr(kibnal_data.kib_hca, - addr, - &netaddr); - LASSERT (vvrc == vv_return_ok); - return netaddr; -} -#endif - -/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the - * lowest 2 bits of the work request id to stash the work item type (the op - * field is not valid when the wc completes in error). */ - -#define IBNAL_WID_TX 0 -#define IBNAL_WID_RX 1 -#define IBNAL_WID_RDMA 2 -#define IBNAL_WID_MASK 3UL - -static inline vv_wr_id_t -kibnal_ptr2wreqid (void *ptr, int type) -{ - unsigned long lptr = (unsigned long)ptr; - - LASSERT ((lptr & IBNAL_WID_MASK) == 0); - LASSERT ((type & ~IBNAL_WID_MASK) == 0); - return (vv_wr_id_t)(lptr | type); -} - -static inline void * -kibnal_wreqid2ptr (vv_wr_id_t wreqid) -{ - return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK); -} - -static inline int -kibnal_wreqid2type (vv_wr_id_t wreqid) -{ - return (wreqid & IBNAL_WID_MASK); -} - -static inline void -kibnal_set_conn_state (kib_conn_t *conn, int state) -{ - conn->ibc_state = state; - mb(); -} - -#if IBNAL_USE_FMR - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - return rd->rd_nob; -} - -#else -static inline __u64 -kibnal_rf_addr (kib_rdma_frag_t *rf) -{ - return (((__u64)rf->rf_addr_hi)<<32) | ((__u64)rf->rf_addr_lo); -} - -static inline void -kibnal_rf_set (kib_rdma_frag_t *rf, __u64 addr, int nob) -{ - rf->rf_addr_lo = addr & 0xffffffff; - rf->rf_addr_hi = (addr >> 32) & 0xffffffff; - rf->rf_nob = nob; -} - -static inline int -kibnal_rd_size (kib_rdma_desc_t *rd) -{ - int i; - int size; - - for (i = size = 0; i < rd->rd_nfrag; i++) - size += rd->rd_frags[i].rf_nob; - - return size; -} -#endif diff --git a/lnet/klnds/viblnd/viblnd_cb.c b/lnet/klnds/viblnd/viblnd_cb.c deleted file mode 100644 index 490a7e9b17507691e0c8762f5ccff5bbc5e54d77..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_cb.c +++ /dev/null @@ -1,3674 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Frank Zago <fzago@systemfabricworks.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -void -kibnal_tx_done (kib_tx_t *tx) -{ - lnet_msg_t *lntmsg[2]; - int rc = tx->tx_status; - int i; - - LASSERT (!in_interrupt()); - LASSERT (!tx->tx_queued); /* mustn't be queued for sending */ - LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */ - LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */ - -#if IBNAL_USE_FMR - if (tx->tx_md.md_fmrcount == 0 || - (rc != 0 && tx->tx_md.md_active)) { - vv_return_t vvrc; - - /* mapping must be active (it dropped fmrcount to 0) */ - LASSERT (tx->tx_md.md_active); - - vvrc = vv_unmap_fmr(kibnal_data.kib_hca, - 1, &tx->tx_md.md_fmrhandle); - LASSERT (vvrc == vv_return_ok); - - tx->tx_md.md_fmrcount = *kibnal_tunables.kib_fmr_remaps; - } - tx->tx_md.md_active = 0; -#endif - - /* tx may have up to 2 lnet msgs to finalise */ - lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL; - lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL; - - if (tx->tx_conn != NULL) { - kibnal_conn_decref(tx->tx_conn); - tx->tx_conn = NULL; - } - - tx->tx_nwrq = 0; - tx->tx_status = 0; - - spin_lock(&kibnal_data.kib_tx_lock); - - list_add (&tx->tx_list, &kibnal_data.kib_idle_txs); - - spin_unlock(&kibnal_data.kib_tx_lock); - - /* delay finalize until my descs have been freed */ - for (i = 0; i < 2; i++) { - if (lntmsg[i] == NULL) - continue; - - lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc); - } -} - -void -kibnal_txlist_done (struct list_head *txlist, int status) -{ - kib_tx_t *tx; - - while (!list_empty (txlist)) { - tx = list_entry (txlist->next, kib_tx_t, tx_list); - - list_del (&tx->tx_list); - /* complete now */ - tx->tx_waiting = 0; - tx->tx_status = status; - kibnal_tx_done (tx); - } -} - -kib_tx_t * -kibnal_get_idle_tx (void) -{ - kib_tx_t *tx; - - spin_lock(&kibnal_data.kib_tx_lock); - - if (list_empty (&kibnal_data.kib_idle_txs)) { - spin_unlock(&kibnal_data.kib_tx_lock); - return NULL; - } - - tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - /* Allocate a new completion cookie. It might not be needed, - * but we've got a lock right now and we're unlikely to - * wrap... */ - tx->tx_cookie = kibnal_data.kib_next_tx_cookie++; - - spin_unlock(&kibnal_data.kib_tx_lock); - - LASSERT (tx->tx_nwrq == 0); - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending == 0); - LASSERT (!tx->tx_waiting); - LASSERT (tx->tx_status == 0); - LASSERT (tx->tx_conn == NULL); - LASSERT (tx->tx_lntmsg[0] == NULL); - LASSERT (tx->tx_lntmsg[1] == NULL); - - return tx; -} - -int -kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit) -{ - kib_conn_t *conn = rx->rx_conn; - int rc = 0; - __u64 addr = (__u64)((unsigned long)((rx)->rx_msg)); - vv_return_t vvrc; - - LASSERT (!in_interrupt()); - /* old peers don't reserve rxs for RDMA replies */ - LASSERT (!rsrvd_credit || - conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - - rx->rx_gl = (vv_scatgat_t) { - .v_address = KIBNAL_ADDR2SG(addr), - .l_key = rx->rx_lkey, - .length = IBNAL_MSG_SIZE, - }; - - rx->rx_wrq = (vv_wr_t) { - .wr_id = kibnal_ptr2wreqid(rx, IBNAL_WID_RX), - .completion_notification = 1, - .scatgat_list = &rx->rx_gl, - .num_of_data_segments = 1, - .wr_type = vv_wr_receive, - }; - - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT); - LASSERT (rx->rx_nob >= 0); /* not posted */ - - CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n", - rx->rx_wrq.scatgat_list->length, - rx->rx_wrq.scatgat_list->l_key, - KIBNAL_SG2ADDR(rx->rx_wrq.scatgat_list->v_address)); - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) { - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return 0; - } - - rx->rx_nob = -1; /* flag posted */ - - spin_lock(&conn->ibc_lock); - /* Serialise vv_post_receive; it's not re-entrant on the same QP */ - vvrc = vv_post_receive(kibnal_data.kib_hca, - conn->ibc_qp, &rx->rx_wrq); - - if (vvrc == vv_return_ok) { - if (credit) - conn->ibc_outstanding_credits++; - if (rsrvd_credit) - conn->ibc_reserved_credits++; - - spin_unlock(&conn->ibc_lock); - - if (credit || rsrvd_credit) - kibnal_check_sends(conn); - - return 0; - } - - spin_unlock(&conn->ibc_lock); - - CERROR ("post rx -> %s failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc); - rc = -EIO; - kibnal_close_conn(rx->rx_conn, rc); - /* No more posts for this rx; so lose its ref */ - kibnal_conn_decref(conn); - return rc; -} - -int -kibnal_post_receives (kib_conn_t *conn) -{ - int i; - int rc; - - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - LASSERT (conn->ibc_comms_error == 0); - - for (i = 0; i < IBNAL_RX_MSGS; i++) { - /* +1 ref for rx desc. This ref remains until kibnal_post_rx - * fails (i.e. actual failure or we're disconnecting) */ - kibnal_conn_addref(conn); - rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0); - if (rc != 0) - return rc; - } - - return 0; -} - -kib_tx_t * -kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie) -{ - struct list_head *tmp; - - list_for_each(tmp, &conn->ibc_active_txs) { - kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list); - - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_sending != 0 || tx->tx_waiting); - - if (tx->tx_cookie != cookie) - continue; - - if (tx->tx_waiting && - tx->tx_msg->ibm_type == txtype) - return tx; - - CWARN("Bad completion: %swaiting, type %x (wanted %x)\n", - tx->tx_waiting ? "" : "NOT ", - tx->tx_msg->ibm_type, txtype); - } - return NULL; -} - -void -kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie) -{ - kib_tx_t *tx; - int idle; - - spin_lock(&conn->ibc_lock); - - tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie); - if (tx == NULL) { - spin_unlock(&conn->ibc_lock); - - CWARN("Unmatched completion type %x cookie "LPX64" from %s\n", - txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_close_conn (conn, -EPROTO); - return; - } - - if (tx->tx_status == 0) { /* success so far */ - if (status < 0) { /* failed? */ - tx->tx_status = status; - } else if (txtype == IBNAL_MSG_GET_REQ) { - lnet_set_reply_msg_len(kibnal_data.kib_ni, - tx->tx_lntmsg[1], status); - } - } - - tx->tx_waiting = 0; - - idle = !tx->tx_queued && (tx->tx_sending == 0); - if (idle) - list_del(&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done(tx); -} - -void -kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie) -{ - kib_tx_t *tx = kibnal_get_idle_tx(); - - if (tx == NULL) { - CERROR("Can't get tx for completion %x for %s\n", - type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - tx->tx_msg->ibm_u.completion.ibcm_status = status; - tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie; - kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t)); - - kibnal_queue_tx(tx, conn); -} - -void -kibnal_handle_rx (kib_rx_t *rx) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - int credits = msg->ibm_credits; - kib_tx_t *tx; - int rc = 0; - int repost = 1; - int rsrvd_credit = 0; - int rc2; - - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - CDEBUG (D_NET, "Received %x[%d] from %s\n", - msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - if (credits != 0) { - /* Have I received credits that will let me send? */ - spin_lock(&conn->ibc_lock); - conn->ibc_credits += credits; - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); - } - - switch (msg->ibm_type) { - default: - CERROR("Bad IBNAL message type %x from %s\n", - msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - - case IBNAL_MSG_NOOP: - break; - - case IBNAL_MSG_IMMEDIATE: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr, - msg->ibm_srcnid, rx, 0); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_PUT_NAK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid)); - kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_PUT_ACK: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - spin_lock(&conn->ibc_lock); - tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ, - msg->ibm_u.putack.ibpam_src_cookie); - if (tx != NULL) - list_del(&tx->tx_list); - spin_unlock(&conn->ibc_lock); - - if (tx == NULL) { - CERROR("Unmatched PUT_ACK from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - rc = -EPROTO; - break; - } - - LASSERT (tx->tx_waiting); - /* CAVEAT EMPTOR: I could be racing with tx_complete, but... - * (a) I can overwrite tx_msg since my peer has received it! - * (b) tx_waiting set tells tx_complete() it's not done. */ - - tx->tx_nwrq = 0; /* overwrite PUT_REQ */ - - rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE, - kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd), - &msg->ibm_u.putack.ibpam_rd, - msg->ibm_u.putack.ibpam_dst_cookie); - if (rc2 < 0) - CERROR("Can't setup rdma for PUT to %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2); - - spin_lock(&conn->ibc_lock); - if (tx->tx_status == 0 && rc2 < 0) - tx->tx_status = rc2; - tx->tx_waiting = 0; /* clear waiting and queue atomically */ - kibnal_queue_tx_locked(tx, conn); - spin_unlock(&conn->ibc_lock); - break; - - case IBNAL_MSG_PUT_DONE: - /* This buffer was pre-reserved by not returning the credit - * when the PUT_REQ's buffer was reposted, so I just return it - * now */ - kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - - case IBNAL_MSG_GET_REQ: - rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr, - msg->ibm_srcnid, rx, 1); - repost = rc < 0; /* repost on error */ - break; - - case IBNAL_MSG_GET_DONE: - rsrvd_credit = 1; /* rdma reply (was pre-reserved) */ - - kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ, - msg->ibm_u.completion.ibcm_status, - msg->ibm_u.completion.ibcm_cookie); - break; - } - - if (rc < 0) /* protocol error */ - kibnal_close_conn(conn, rc); - - if (repost) { - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - rsrvd_credit = 0; /* peer isn't pre-reserving */ - - kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit); - } -} - -void -kibnal_rx_complete (kib_rx_t *rx, vv_comp_status_t vvrc, int nob, __u64 rxseq) -{ - kib_msg_t *msg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - unsigned long flags; - int rc; - - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - LASSERT (rx->rx_nob < 0); /* was posted */ - rx->rx_nob = 0; /* isn't now */ - - if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) - goto ignore; - - if (vvrc != vv_comp_status_success) { - CERROR("Rx from %s failed: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), vvrc); - goto failed; - } - - rc = kibnal_unpack_msg(msg, conn->ibc_version, nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - rx->rx_nob = nob; /* Can trust 'nob' now */ - - if (!lnet_ptlcompat_matchnid(conn->ibc_peer->ibp_nid, - msg->ibm_srcnid) || - !lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg->ibm_dstnid) || - msg->ibm_srcstamp != conn->ibc_incarnation || - msg->ibm_dststamp != kibnal_data.kib_incarnation) { - CERROR ("Stale rx from %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - goto failed; - } - - if (msg->ibm_seq != rxseq) { - CERROR ("Out-of-sequence rx from %s" - ": got "LPD64" but expected "LPD64"\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - msg->ibm_seq, rxseq); - goto failed; - } - - /* set time last known alive */ - kibnal_peer_alive(conn->ibc_peer); - - /* racing with connection establishment/teardown! */ - - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - /* must check holding global lock to eliminate race */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - list_add_tail(&rx->rx_list, &conn->ibc_early_rxs); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - return; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - } - kibnal_handle_rx(rx); - return; - - failed: - CDEBUG(D_NET, "rx %p conn %p\n", rx, conn); - kibnal_close_conn(conn, -EIO); - ignore: - /* Don't re-post rx & drop its ref on conn */ - kibnal_conn_decref(conn); -} - -struct page * -kibnal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) { - page = vmalloc_to_page ((void *)vaddr); - LASSERT (page != NULL); - return page; - } -#if CONFIG_HIGHMEM - if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) { - /* No highmem pages only used for bulk (kiov) I/O */ - CERROR("find page for address in highmem\n"); - LBUG(); - } -#endif - page = virt_to_page (vaddr); - LASSERT (page != NULL); - return page; -} - -#if !IBNAL_USE_FMR -int -kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page, - unsigned long page_offset, unsigned long len) -{ - kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag]; - vv_l_key_t l_key; - vv_r_key_t r_key; - __u64 addr; - __u64 frag_addr; - vv_mem_reg_h_t mem_h; - vv_return_t vvrc; - - if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) { - CERROR ("Too many RDMA fragments\n"); - return -EMSGSIZE; - } - - /* Try to create an address that adaptor-tavor will munge into a valid - * network address, given how it maps all phys mem into 1 region */ - addr = lnet_page2phys(page) + page_offset + PAGE_OFFSET; - - /* NB this relies entirely on there being a single region for the whole - * of memory, since "high" memory will wrap in the (void *) cast! */ - vvrc = vv_get_gen_mr_attrib(kibnal_data.kib_hca, - (void *)((unsigned long)addr), - len, &mem_h, &l_key, &r_key); - LASSERT (vvrc == vv_return_ok); - - if (active) { - if (rd->rd_nfrag == 0) { - rd->rd_key = l_key; - } else if (l_key != rd->rd_key) { - CERROR ("> 1 key for single RDMA desc\n"); - return -EINVAL; - } - frag_addr = addr; - } else { - if (rd->rd_nfrag == 0) { - rd->rd_key = r_key; - } else if (r_key != rd->rd_key) { - CERROR ("> 1 key for single RDMA desc\n"); - return -EINVAL; - } - - frag_addr = kibnal_addr2net(addr); - } - - kibnal_rf_set(frag, frag_addr, len); - - CDEBUG(D_NET,"map frag [%d][%d %x %08x%08x] "LPX64"\n", - rd->rd_nfrag, frag->rf_nob, rd->rd_key, - frag->rf_addr_hi, frag->rf_addr_lo, frag_addr); - - rd->rd_nfrag++; - return 0; -} - -int -kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int fragnob; - int rc; - unsigned long vaddr; - struct page *page; - int page_offset; - - LASSERT (nob > 0); - LASSERT (niov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (niov > 0); - - vaddr = ((unsigned long)iov->iov_base) + offset; - page_offset = vaddr & (PAGE_SIZE - 1); - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR ("Can't find page\n"); - return -EFAULT; - } - - fragnob = min((int)(iov->iov_len - offset), nob); - fragnob = min(fragnob, (int)PAGE_SIZE - page_offset); - - rc = kibnal_append_rdfrag(rd, active, page, - page_offset, fragnob); - if (rc != 0) - return rc; - - if (offset + fragnob < iov->iov_len) { - offset += fragnob; - } else { - offset = 0; - iov++; - niov--; - } - nob -= fragnob; - } while (nob > 0); - - return 0; -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int fragnob; - int rc; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - rd->rd_nfrag = 0; - do { - LASSERT (nkiov > 0); - fragnob = min((int)(kiov->kiov_len - offset), nob); - - rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page, - kiov->kiov_offset + offset, - fragnob); - if (rc != 0) - return rc; - - offset = 0; - kiov++; - nkiov--; - nob -= fragnob; - } while (nob > 0); - - return 0; -} -#else -int -kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active, - int npages, unsigned long page_offset, int nob) -{ - vv_return_t vvrc; - vv_fmr_map_t map_props; - - LASSERT ((rd != tx->tx_rd) == !active); - LASSERT (!tx->tx_md.md_active); - LASSERT (tx->tx_md.md_fmrcount > 0); - LASSERT (page_offset < PAGE_SIZE); - LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT))); - LASSERT (npages <= LNET_MAX_IOV); - - memset(&map_props, 0, sizeof(map_props)); - - map_props.start = (void *)page_offset; - map_props.size = nob; - map_props.page_array_len = npages; - map_props.page_array = tx->tx_pages; - - vvrc = vv_map_fmr(kibnal_data.kib_hca, tx->tx_md.md_fmrhandle, - &map_props, &tx->tx_md.md_lkey, &tx->tx_md.md_rkey); - if (vvrc != vv_return_ok) { - CERROR ("Can't map vaddr %p for %d in %d pages: %d\n", - map_props.start, nob, npages, vvrc); - return -EFAULT; - } - - tx->tx_md.md_addr = (unsigned long)map_props.start; - tx->tx_md.md_active = 1; - tx->tx_md.md_fmrcount--; - - rd->rd_key = active ? tx->tx_md.md_lkey : tx->tx_md.md_rkey; - rd->rd_nob = nob; - rd->rd_addr = tx->tx_md.md_addr; - - /* Compensate for adaptor-tavor's munging of gatherlist addresses */ - if (active) - rd->rd_addr += PAGE_OFFSET; - - return 0; -} - -int -kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - unsigned int niov, struct iovec *iov, int offset, int nob) - -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int resid; - int fragnob; - struct page *page; - int npages; - unsigned long page_offset; - unsigned long vaddr; - - LASSERT (nob > 0); - LASSERT (niov > 0); - - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - niov--; - iov++; - LASSERT (niov > 0); - } - - if (nob > iov->iov_len - offset) { - CERROR ("Can't map multiple vaddr fragments\n"); - return (-EMSGSIZE); - } - - vaddr = ((unsigned long)iov->iov_base) + offset; - - page_offset = vaddr & (PAGE_SIZE - 1); - resid = nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - - page = kibnal_kvaddr_to_page(vaddr); - if (page == NULL) { - CERROR("Can't find page for %lu\n", vaddr); - return -EFAULT; - } - - tx->tx_pages[npages++] = lnet_page2phys(page); - - fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1)); - vaddr += fragnob; - resid -= fragnob; - - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} - -int -kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, - vv_access_con_bit_mask_t access, - int nkiov, lnet_kiov_t *kiov, int offset, int nob) -{ - /* active if I'm sending */ - int active = ((access & vv_acc_r_mem_write) == 0); - int resid; - int npages; - unsigned long page_offset; - - CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob); - - LASSERT (nob > 0); - LASSERT (nkiov > 0); - LASSERT (nkiov <= LNET_MAX_IOV); - LASSERT (!tx->tx_md.md_active); - LASSERT ((rd != tx->tx_rd) == !active); - - while (offset >= kiov->kiov_len) { - offset -= kiov->kiov_len; - nkiov--; - kiov++; - LASSERT (nkiov > 0); - } - - page_offset = kiov->kiov_offset + offset; - - resid = offset + nob; - npages = 0; - - do { - LASSERT (npages < LNET_MAX_IOV); - LASSERT (nkiov > 0); - - if ((npages > 0 && kiov->kiov_offset != 0) || - (resid > kiov->kiov_len && - (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) { - /* Can't have gaps */ - CERROR ("Can't make payload contiguous in I/O VM:" - "page %d, offset %d, len %d \n", - npages, kiov->kiov_offset, kiov->kiov_len); - - return -EINVAL; - } - - tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page); - resid -= kiov->kiov_len; - kiov++; - nkiov--; - } while (resid > 0); - - return kibnal_map_tx(tx, rd, active, npages, page_offset, nob); -} -#endif - -kib_conn_t * -kibnal_find_conn_locked (kib_peer_t *peer) -{ - struct list_head *tmp; - - /* just return the first connection */ - list_for_each (tmp, &peer->ibp_conns) { - return (list_entry(tmp, kib_conn_t, ibc_list)); - } - - return (NULL); -} - -void -kibnal_check_sends (kib_conn_t *conn) -{ - kib_tx_t *tx; - vv_return_t vvrc; - int rc; - int consume_cred; - int done; - - /* Don't send anything until after the connection is established */ - if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) { - CDEBUG(D_NET, "%s too soon\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return; - } - - spin_lock(&conn->ibc_lock); - - LASSERT (conn->ibc_nsends_posted <= - *kibnal_tunables.kib_concurrent_sends); - LASSERT (conn->ibc_reserved_credits >= 0); - - while (conn->ibc_reserved_credits > 0 && - !list_empty(&conn->ibc_tx_queue_rsrvd)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry(conn->ibc_tx_queue_rsrvd.next, - kib_tx_t, tx_list); - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &conn->ibc_tx_queue); - conn->ibc_reserved_credits--; - } - - if (list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_nocred) && - (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER || - kibnal_send_keepalive(conn))) { - spin_unlock(&conn->ibc_lock); - - tx = kibnal_get_idle_tx(); - if (tx != NULL) - kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0); - - spin_lock(&conn->ibc_lock); - - if (tx != NULL) - kibnal_queue_tx_locked(tx, conn); - } - - for (;;) { - if (!list_empty(&conn->ibc_tx_queue_nocred)) { - LASSERT (conn->ibc_version != - IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD); - tx = list_entry (conn->ibc_tx_queue_nocred.next, - kib_tx_t, tx_list); - consume_cred = 0; - } else if (!list_empty (&conn->ibc_tx_queue)) { - tx = list_entry (conn->ibc_tx_queue.next, - kib_tx_t, tx_list); - consume_cred = 1; - } else { - /* nothing waiting */ - break; - } - - LASSERT (tx->tx_queued); - /* We rely on this for QP sizing */ - LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS); - - LASSERT (conn->ibc_outstanding_credits >= 0); - LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE); - LASSERT (conn->ibc_credits >= 0); - LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE); - - if (conn->ibc_nsends_posted == - *kibnal_tunables.kib_concurrent_sends) { - /* We've got some tx completions outstanding... */ - CDEBUG(D_NET, "%s: posted enough\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (consume_cred) { - if (conn->ibc_credits == 0) { /* no credits */ - CDEBUG(D_NET, "%s: no credits\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - - if (conn->ibc_credits == 1 && /* last credit reserved for */ - conn->ibc_outstanding_credits == 0) { /* giving back credits */ - CDEBUG(D_NET, "%s: not using last credit\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - break; - } - } - - list_del (&tx->tx_list); - tx->tx_queued = 0; - - /* NB don't drop ibc_lock before bumping tx_sending */ - - if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP && - (!list_empty(&conn->ibc_tx_queue) || - !list_empty(&conn->ibc_tx_queue_nocred) || - (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER && - !kibnal_send_keepalive(conn)))) { - /* redundant NOOP */ - spin_unlock(&conn->ibc_lock); - kibnal_tx_done(tx); - spin_lock(&conn->ibc_lock); - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - continue; - } - - kibnal_pack_msg(tx->tx_msg, conn->ibc_version, - conn->ibc_outstanding_credits, - conn->ibc_peer->ibp_nid, conn->ibc_incarnation, - conn->ibc_txseq); - - conn->ibc_txseq++; - conn->ibc_outstanding_credits = 0; - conn->ibc_nsends_posted++; - if (consume_cred) - conn->ibc_credits--; - - /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA - * PUT. If so, it was first queued here as a PUT_REQ, sent and - * stashed on ibc_active_txs, matched by an incoming PUT_ACK, - * and then re-queued here. It's (just) possible that - * tx_sending is non-zero if we've not done the tx_complete() from - * the first send; hence the ++ rather than = below. */ - tx->tx_sending++; - - list_add (&tx->tx_list, &conn->ibc_active_txs); - - /* Keep holding ibc_lock while posting sends on this - * connection; vv_post_send() isn't re-entrant on the same - * QP!! */ - - LASSERT (tx->tx_nwrq > 0); -#if 0 - if (tx->tx_wrq[0].wr_type == vv_wr_rdma_write) - CDEBUG(D_NET, "WORK[0]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", - tx->tx_wrq[0].scatgat_list->v_address, - tx->tx_wrq[0].scatgat_list->length, - tx->tx_wrq[0].scatgat_list->l_key, - tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_addr, - tx->tx_wrq[0].type.send.send_qp_type.rc_type.r_r_key); - else - CDEBUG(D_NET, "WORK[0]: %s gl %p for %d k %x\n", - tx->tx_wrq[0].wr_type == vv_wr_send ? "SEND" : "????", - tx->tx_wrq[0].scatgat_list->v_address, - tx->tx_wrq[0].scatgat_list->length, - tx->tx_wrq[0].scatgat_list->l_key); - - if (tx->tx_nwrq > 1) { - if (tx->tx_wrq[1].wr_type == vv_wr_rdma_write) - CDEBUG(D_NET, "WORK[1]: RDMA gl %p for %d k %x -> "LPX64" k %x\n", - tx->tx_wrq[1].scatgat_list->v_address, - tx->tx_wrq[1].scatgat_list->length, - tx->tx_wrq[1].scatgat_list->l_key, - tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_addr, - tx->tx_wrq[1].type.send.send_qp_type.rc_type.r_r_key); - else - CDEBUG(D_NET, "WORK[1]: %s gl %p for %d k %x\n", - tx->tx_wrq[1].wr_type == vv_wr_send ? "SEND" : "????", - tx->tx_wrq[1].scatgat_list->v_address, - tx->tx_wrq[1].scatgat_list->length, - tx->tx_wrq[1].scatgat_list->l_key); - } -#endif - rc = -ECONNABORTED; - vvrc = vv_return_ok; - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) { - tx->tx_status = 0; - vvrc = vv_post_send_list(kibnal_data.kib_hca, - conn->ibc_qp, - tx->tx_nwrq, - tx->tx_wrq, - vv_operation_type_send_rc); - rc = (vvrc == vv_return_ok) ? 0 : -EIO; - } - - conn->ibc_last_send = jiffies; - - if (rc != 0) { - /* NB credits are transferred in the actual - * message, which can only be the last work item */ - conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits; - if (consume_cred) - conn->ibc_credits++; - conn->ibc_nsends_posted--; - - tx->tx_status = rc; - tx->tx_waiting = 0; - tx->tx_sending--; - - done = (tx->tx_sending == 0); - if (done) - list_del (&tx->tx_list); - - spin_unlock(&conn->ibc_lock); - - if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CERROR ("Error %d posting transmit to %s\n", - vvrc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - else - CDEBUG (D_NET, "Error %d posting transmit to %s\n", - rc, libcfs_nid2str(conn->ibc_peer->ibp_nid)); - - kibnal_close_conn (conn, rc); - - if (done) - kibnal_tx_done (tx); - return; - } - } - - spin_unlock(&conn->ibc_lock); -} - -void -kibnal_tx_complete (kib_tx_t *tx, vv_comp_status_t vvrc) -{ - kib_conn_t *conn = tx->tx_conn; - int failed = (vvrc != vv_comp_status_success); - int idle; - - CDEBUG(D_NET, "tx %p conn %p sending %d nwrq %d vvrc %d\n", - tx, conn, tx->tx_sending, tx->tx_nwrq, vvrc); - - LASSERT (tx->tx_sending > 0); - - if (failed && - tx->tx_status == 0 && - conn->ibc_state == IBNAL_CONN_ESTABLISHED) - CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64 - "sending %d waiting %d: failed %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - tx->tx_msg->ibm_type, tx->tx_cookie, - tx->tx_sending, tx->tx_waiting, vvrc); - - spin_lock(&conn->ibc_lock); - - /* I could be racing with rdma completion. Whoever makes 'tx' idle - * gets to free it, which also drops its ref on 'conn'. */ - - tx->tx_sending--; - conn->ibc_nsends_posted--; - - if (failed) { - tx->tx_waiting = 0; - tx->tx_status = -EIO; - } - - idle = (tx->tx_sending == 0) && /* This is the final callback */ - !tx->tx_waiting && /* Not waiting for peer */ - !tx->tx_queued; /* Not re-queued (PUT_DONE) */ - if (idle) - list_del(&tx->tx_list); - - kibnal_conn_addref(conn); /* 1 ref for me.... */ - - spin_unlock(&conn->ibc_lock); - - if (idle) - kibnal_tx_done (tx); - - if (failed) { - kibnal_close_conn (conn, -EIO); - } else { - kibnal_peer_alive(conn->ibc_peer); - kibnal_check_sends(conn); - } - - kibnal_conn_decref(conn); /* ...until here */ -} - -void -kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob) -{ - vv_scatgat_t *gl = &tx->tx_gl[tx->tx_nwrq]; - vv_wr_t *wrq = &tx->tx_wrq[tx->tx_nwrq]; - int nob = offsetof (kib_msg_t, ibm_u) + body_nob; - __u64 addr = (__u64)((unsigned long)((tx)->tx_msg)); - - LASSERT (tx->tx_nwrq >= 0 && - tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS)); - LASSERT (nob <= IBNAL_MSG_SIZE); - - kibnal_init_msg(tx->tx_msg, type, body_nob); - - *gl = (vv_scatgat_t) { - .v_address = KIBNAL_ADDR2SG(addr), - .l_key = tx->tx_lkey, - .length = nob, - }; - - memset(wrq, 0, sizeof(*wrq)); - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_TX); - wrq->wr_type = vv_wr_send; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->completion_notification = 1; - wrq->type.send.solicited_event = 1; - wrq->type.send.immidiate_data_indicator = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - - tx->tx_nwrq++; -} - -int -kibnal_init_rdma (kib_tx_t *tx, int type, int nob, - kib_rdma_desc_t *dstrd, __u64 dstcookie) -{ - kib_msg_t *ibmsg = tx->tx_msg; - kib_rdma_desc_t *srcrd = tx->tx_rd; - vv_scatgat_t *gl; - vv_wr_t *wrq; - int rc; - -#if IBNAL_USE_FMR - LASSERT (tx->tx_nwrq == 0); - - gl = &tx->tx_gl[0]; - gl->length = nob; - gl->v_address = KIBNAL_ADDR2SG(srcrd->rd_addr); - gl->l_key = srcrd->rd_key; - - wrq = &tx->tx_wrq[0]; - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->completion_notification = 0; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->wr_type = vv_wr_rdma_write; - wrq->type.send.solicited_event = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - wrq->type.send.send_qp_type.rc_type.r_addr = dstrd->rd_addr; - wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key; - - tx->tx_nwrq = 1; - rc = nob; -#else - /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */ - int resid = nob; - kib_rdma_frag_t *srcfrag; - int srcidx; - kib_rdma_frag_t *dstfrag; - int dstidx; - int wrknob; - - /* Called by scheduler */ - LASSERT (!in_interrupt()); - - LASSERT (type == IBNAL_MSG_GET_DONE || - type == IBNAL_MSG_PUT_DONE); - - srcidx = dstidx = 0; - srcfrag = &srcrd->rd_frags[0]; - dstfrag = &dstrd->rd_frags[0]; - rc = resid; - - while (resid > 0) { - if (srcidx >= srcrd->rd_nfrag) { - CERROR("Src buffer exhausted: %d frags\n", srcidx); - rc = -EPROTO; - break; - } - - if (dstidx == dstrd->rd_nfrag) { - CERROR("Dst buffer exhausted: %d frags\n", dstidx); - rc = -EPROTO; - break; - } - - if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) { - CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n", - srcidx, srcrd->rd_nfrag, - dstidx, dstrd->rd_nfrag); - rc = -EMSGSIZE; - break; - } - - wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid); - - gl = &tx->tx_gl[tx->tx_nwrq]; - gl->v_address = KIBNAL_ADDR2SG(kibnal_rf_addr(srcfrag)); - gl->length = wrknob; - gl->l_key = srcrd->rd_key; - - wrq = &tx->tx_wrq[tx->tx_nwrq]; - - wrq->wr_id = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA); - wrq->completion_notification = 0; - wrq->scatgat_list = gl; - wrq->num_of_data_segments = 1; - wrq->wr_type = vv_wr_rdma_write; - wrq->type.send.solicited_event = 0; - wrq->type.send.send_qp_type.rc_type.fance_indicator = 0; - wrq->type.send.send_qp_type.rc_type.r_addr = kibnal_rf_addr(dstfrag); - wrq->type.send.send_qp_type.rc_type.r_r_key = dstrd->rd_key; - - resid -= wrknob; - if (wrknob < srcfrag->rf_nob) { - kibnal_rf_set(srcfrag, - kibnal_rf_addr(srcfrag) + wrknob, - srcfrag->rf_nob - wrknob); - } else { - srcfrag++; - srcidx++; - } - - if (wrknob < dstfrag->rf_nob) { - kibnal_rf_set(dstfrag, - kibnal_rf_addr(dstfrag) + wrknob, - dstfrag->rf_nob - wrknob); - } else { - dstfrag++; - dstidx++; - } - - tx->tx_nwrq++; - } - - if (rc < 0) /* no RDMA if completing with failure */ - tx->tx_nwrq = 0; -#endif - - ibmsg->ibm_u.completion.ibcm_status = rc; - ibmsg->ibm_u.completion.ibcm_cookie = dstcookie; - kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t)); - - return rc; -} - -void -kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn) -{ - spin_lock(&conn->ibc_lock); - kibnal_queue_tx_locked (tx, conn); - spin_unlock(&conn->ibc_lock); - - kibnal_check_sends(conn); -} - -void -kibnal_schedule_peer_arp (kib_peer_t *peer) -{ - unsigned long flags; - - LASSERT (peer->ibp_connecting != 0); - LASSERT (peer->ibp_arp_count > 0); - - kibnal_peer_addref(peer); /* extra ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid) -{ - kib_peer_t *peer; - kib_conn_t *conn; - unsigned long flags; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - int retry; - int rc; - - /* If I get here, I've committed to send, so I complete the tx with - * failure on any problems */ - - LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */ - LASSERT (tx->tx_nwrq > 0); /* work items have been set up */ - - for (retry = 0; ; retry = 1) { - read_lock_irqsave(g_lock, flags); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) { - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - kibnal_conn_addref(conn); /* 1 ref for me... */ - read_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...to here */ - return; - } - } - - /* Making one or more connections; I'll need a write lock... */ - read_unlock(g_lock); - write_lock(g_lock); - - peer = kibnal_find_peer_locked (nid); - if (peer != NULL) - break; - - write_unlock_irqrestore(g_lock, flags); - - if (retry) { - CERROR("Can't find peer %s\n", libcfs_nid2str(nid)); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - rc = kibnal_add_persistent_peer(nid, LNET_NIDADDR(nid)); - if (rc != 0) { - CERROR("Can't add peer %s: %d\n", - libcfs_nid2str(nid), rc); - - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - } - - conn = kibnal_find_conn_locked (peer); - if (conn != NULL) { - /* Connection exists; queue message on it */ - kibnal_conn_addref(conn); /* 1 ref for me... */ - write_unlock_irqrestore(g_lock, flags); - - kibnal_queue_tx (tx, conn); - kibnal_conn_decref(conn); /* ...until here */ - return; - } - - if (peer->ibp_connecting == 0 && - peer->ibp_accepting == 0) { - if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */ - time_after_eq(jiffies, peer->ibp_reconnect_time))) { - write_unlock_irqrestore(g_lock, flags); - tx->tx_status = -EHOSTUNREACH; - tx->tx_waiting = 0; - kibnal_tx_done (tx); - return; - } - - peer->ibp_connecting = 1; - peer->ibp_arp_count = 1 + *kibnal_tunables.kib_arp_retries; - kibnal_schedule_peer_arp(peer); - } - - /* A connection is being established; queue the message... */ - list_add_tail (&tx->tx_list, &peer->ibp_tx_queue); - - write_unlock_irqrestore(g_lock, flags); -} - -int -kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kib_msg_t *ibmsg; - kib_tx_t *tx; - int nob; - int rc; - - /* NB 'private' is different depending on what we're sending.... */ - - CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n", - payload_nob, payload_niov, libcfs_id2str(target)); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - - /* Thread context */ - LASSERT (!in_interrupt()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - switch (type) { - default: - LBUG(); - return (-EIO); - - case LNET_MSG_ACK: - LASSERT (payload_nob == 0); - break; - - case LNET_MSG_GET: - if (routing || target_is_router) - break; /* send IMMEDIATE */ - - /* is the REPLY message too small for RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can allocate txd for GET to %s: \n", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.get.ibgm_hdr = *hdr; - ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie; - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd, - vv_acc_r_mem_write, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, - 0, lntmsg->msg_md->md_length); - else - rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd, - vv_acc_r_mem_write, - lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - if (rc != 0) { - CERROR("Can't setup GET sink for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - -#if IBNAL_USE_FMR - nob = sizeof(kib_get_msg_t); -#else - { - int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag; - - nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob); - - tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni, - lntmsg); - if (tx->tx_lntmsg[1] == NULL) { - CERROR("Can't create reply for GET -> %s\n", - libcfs_nid2str(target.nid)); - kibnal_tx_done(tx); - return -EIO; - } - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */ - tx->tx_waiting = 1; /* waiting for GET_DONE */ - kibnal_launch_tx(tx, target.nid); - return 0; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Is the payload small enough not to need RDMA? */ - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]); - if (nob <= IBNAL_MSG_SIZE) - break; /* send IMMEDIATE */ - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate %s txd for %s\n", - type == LNET_MSG_PUT ? "PUT" : "REPLY", - libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - if (payload_kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0, - payload_niov, payload_iov, - payload_offset, payload_nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0, - payload_niov, payload_kiov, - payload_offset, payload_nob); - if (rc != 0) { - CERROR("Can't setup PUT src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - kibnal_tx_done(tx); - return -EIO; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.putreq.ibprm_hdr = *hdr; - ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie; - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t)); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */ - kibnal_launch_tx(tx, target.nid); - return 0; - } - - /* send IMMEDIATE */ - - LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]) - <= IBNAL_MSG_SIZE); - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR ("Can't send %d to %s: tx descs exhausted\n", - type, libcfs_nid2str(target.nid)); - return -ENOMEM; - } - - ibmsg = tx->tx_msg; - ibmsg->ibm_u.immediate.ibim_hdr = *hdr; - - if (payload_kiov != NULL) - lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_kiov, - payload_offset, payload_nob); - else - lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - payload_niov, payload_iov, - payload_offset, payload_nob); - - nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]); - kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - kibnal_launch_tx(tx, target.nid); - return 0; -} - -void -kibnal_reply (lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg) -{ - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - lnet_kiov_t *kiov = lntmsg->msg_kiov; - unsigned int offset = lntmsg->msg_offset; - unsigned int nob = lntmsg->msg_len; - kib_tx_t *tx; - int rc; - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't get tx for REPLY to %s\n", - libcfs_nid2str(target.nid)); - goto failed_0; - } - - if (nob == 0) - rc = 0; - else if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 0, - niov, iov, offset, nob); - else - rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 0, - niov, kiov, offset, nob); - - if (rc != 0) { - CERROR("Can't setup GET src for %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob, - &rx->rx_msg->ibm_u.get.ibgm_rd, - rx->rx_msg->ibm_u.get.ibgm_cookie); - if (rc < 0) { - CERROR("Can't setup rdma for GET from %s: %d\n", - libcfs_nid2str(target.nid), rc); - goto failed_1; - } - - if (rc == 0) { - /* No RDMA: local completion may happen now! */ - lnet_finalize(ni, lntmsg, 0); - } else { - /* RDMA: lnet_finalize(lntmsg) when it - * completes */ - tx->tx_lntmsg[0] = lntmsg; - } - - kibnal_queue_tx(tx, rx->rx_conn); - return; - - failed_1: - kibnal_tx_done(tx); - failed_0: - lnet_finalize(ni, lntmsg, -EIO); -} - -int -kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - void **new_private) -{ - kib_rx_t *rx = private; - kib_conn_t *conn = rx->rx_conn; - - if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) { - /* Can't block if RDMA completions need normal credits */ - LCONSOLE_ERROR("Dropping message from %s: no buffers free. " - "%s is running an old version of LNET that may " - "deadlock if messages wait for buffers)\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - return -EDEADLK; - } - - *new_private = private; - return 0; -} - -int -kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kib_rx_t *rx = private; - kib_msg_t *rxmsg = rx->rx_msg; - kib_conn_t *conn = rx->rx_conn; - kib_tx_t *tx; - kib_msg_t *txmsg; - int nob; - int post_cred = 1; - int rc = 0; - - LASSERT (mlen <= rlen); - LASSERT (!in_interrupt()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - switch (rxmsg->ibm_type) { - default: - LBUG(); - - case IBNAL_MSG_IMMEDIATE: - nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov(niov, kiov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - else - lnet_copy_flat2iov(niov, iov, offset, - IBNAL_MSG_SIZE, rxmsg, - offsetof(kib_msg_t, ibm_u.immediate.ibim_payload), - mlen); - lnet_finalize (ni, lntmsg, 0); - break; - - case IBNAL_MSG_PUT_REQ: - if (mlen == 0) { - lnet_finalize(ni, lntmsg, 0); - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - tx = kibnal_get_idle_tx(); - if (tx == NULL) { - CERROR("Can't allocate tx for %s\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid)); - /* Not replying will break the connection */ - rc = -ENOMEM; - break; - } - - txmsg = tx->tx_msg; - if (kiov == NULL) - rc = kibnal_setup_rd_iov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - vv_acc_r_mem_write, - niov, iov, offset, mlen); - else - rc = kibnal_setup_rd_kiov(tx, - &txmsg->ibm_u.putack.ibpam_rd, - vv_acc_r_mem_write, - niov, kiov, offset, mlen); - if (rc != 0) { - CERROR("Can't setup PUT sink for %s: %d\n", - libcfs_nid2str(conn->ibc_peer->ibp_nid), rc); - kibnal_tx_done(tx); - /* tell peer it's over */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc, - rxmsg->ibm_u.putreq.ibprm_cookie); - break; - } - - txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie; - txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie; -#if IBNAL_USE_FMR - nob = sizeof(kib_putack_msg_t); -#else - { - int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag; - - nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]); - } -#endif - kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob); - - tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */ - tx->tx_waiting = 1; /* waiting for PUT_DONE */ - kibnal_queue_tx(tx, conn); - - if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) - post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */ - break; - - case IBNAL_MSG_GET_REQ: - if (lntmsg != NULL) { - /* Optimized GET; RDMA lntmsg's payload */ - kibnal_reply(ni, rx, lntmsg); - } else { - /* GET didn't match anything */ - kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE, - -ENODATA, - rxmsg->ibm_u.get.ibgm_cookie); - } - break; - } - - kibnal_post_rx(rx, post_cred, 0); - return rc; -} - -int -kibnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kibnal_data.kib_nthreads); - return (0); -} - -void -kibnal_thread_fini (void) -{ - atomic_dec (&kibnal_data.kib_nthreads); -} - -void -kibnal_peer_alive (kib_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->ibp_last_alive = cfs_time_current(); - mb(); -} - -void -kibnal_peer_notify (kib_peer_t *peer) -{ - time_t last_alive = 0; - int error = 0; - unsigned long flags; - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (list_empty(&peer->ibp_conns) && - peer->ibp_accepting == 0 && - peer->ibp_connecting == 0 && - peer->ibp_error != 0) { - error = peer->ibp_error; - peer->ibp_error = 0; - - last_alive = cfs_time_current_sec() - - cfs_duration_sec(cfs_time_current() - - peer->ibp_last_alive); - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (error != 0) - lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive); -} - -void -kibnal_schedule_conn (kib_conn_t *conn) -{ - unsigned long flags; - - kibnal_conn_addref(conn); /* ++ref for connd */ - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns); - wake_up (&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - -void -kibnal_close_conn_locked (kib_conn_t *conn, int error) -{ - /* This just does the immediate housekeeping. 'error' is zero for a - * normal shutdown which can happen only after the connection has been - * established. If the connection is established, schedule the - * connection to be finished off by the connd. Otherwise the connd is - * already dealing with it (either to set it up or tear it down). - * Caller holds kib_global_lock exclusively in irq context */ - kib_peer_t *peer = conn->ibc_peer; - - LASSERT (error != 0 || conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - if (error != 0 && conn->ibc_comms_error == 0) - conn->ibc_comms_error = error; - - if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) - return; /* already being handled */ - - /* NB Can't take ibc_lock here (could be in IRQ context), without - * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */ - - if (error == 0 && - list_empty(&conn->ibc_tx_queue) && - list_empty(&conn->ibc_tx_queue_rsrvd) && - list_empty(&conn->ibc_tx_queue_nocred) && - list_empty(&conn->ibc_active_txs)) { - CDEBUG(D_NET, "closing conn to %s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), - conn->ibc_txseq, conn->ibc_rxseq); - } else { - CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s" - " rx# "LPD64" tx# "LPD64"\n", - libcfs_nid2str(peer->ibp_nid), error, - list_empty(&conn->ibc_tx_queue) ? "" : "(sending)", - list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)", - list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)", - list_empty(&conn->ibc_active_txs) ? "" : "(waiting)", - conn->ibc_txseq, conn->ibc_rxseq); - } - - list_del (&conn->ibc_list); - - if (list_empty (&peer->ibp_conns)) { /* no more conns */ - if (peer->ibp_persistence == 0 && /* non-persistent peer */ - kibnal_peer_active(peer)) /* still in peer table */ - kibnal_unlink_peer_locked (peer); - - /* set/clear error on last conn */ - peer->ibp_error = conn->ibc_comms_error; - } - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECT1); - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); /* lose ibc_list's ref */ -} - -void -kibnal_close_conn (kib_conn_t *conn, int error) -{ - unsigned long flags; - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - kibnal_close_conn_locked (conn, error); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_handle_early_rxs(kib_conn_t *conn) -{ - unsigned long flags; - kib_rx_t *rx; - - LASSERT (!in_interrupt()); - LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - while (!list_empty(&conn->ibc_early_rxs)) { - rx = list_entry(conn->ibc_early_rxs.next, - kib_rx_t, rx_list); - list_del(&rx->rx_list); - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_handle_rx(rx); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs) -{ - LIST_HEAD (zombies); - struct list_head *tmp; - struct list_head *nxt; - kib_tx_t *tx; - - spin_lock(&conn->ibc_lock); - - list_for_each_safe (tmp, nxt, txs) { - tx = list_entry (tmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - tx->tx_status = -ECONNABORTED; - tx->tx_queued = 0; - tx->tx_waiting = 0; - - if (tx->tx_sending == 0) { - list_del (&tx->tx_list); - list_add (&tx->tx_list, &zombies); - } - } - - spin_unlock(&conn->ibc_lock); - - kibnal_txlist_done(&zombies, -ECONNABORTED); -} - -void -kibnal_conn_disconnected(kib_conn_t *conn) -{ - /* I'm the connd */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state >= IBNAL_CONN_INIT); - - kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED); - - /* move QP to error state to make posted work items complete */ - kibnal_set_qp_state(conn, vv_qp_state_error); - - /* Complete all tx descs not waiting for sends to complete. - * NB we should be safe from RDMA now that the QP has changed state */ - - kibnal_abort_txs(conn, &conn->ibc_tx_queue); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd); - kibnal_abort_txs(conn, &conn->ibc_tx_queue_nocred); - kibnal_abort_txs(conn, &conn->ibc_active_txs); - - kibnal_handle_early_rxs(conn); - - kibnal_peer_notify(conn->ibc_peer); -} - -void -kibnal_peer_connect_failed (kib_peer_t *peer, int active, int error) -{ - LIST_HEAD (zombies); - unsigned long flags; - - /* Only the connd creates conns => single threaded */ - LASSERT (error != 0); - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT (peer->ibp_connecting != 0); - peer->ibp_connecting--; - } else { - LASSERT (peer->ibp_accepting != 0); - peer->ibp_accepting--; - } - - if (peer->ibp_connecting != 0 || - peer->ibp_accepting != 0) { - /* another connection attempt under way (loopback?)... */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return; - } - - if (list_empty(&peer->ibp_conns)) { - /* Say when active connection can be re-attempted */ - peer->ibp_reconnect_interval *= 2; - peer->ibp_reconnect_interval = - MAX(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_min_reconnect_interval); - peer->ibp_reconnect_interval = - MIN(peer->ibp_reconnect_interval, - *kibnal_tunables.kib_max_reconnect_interval); - - peer->ibp_reconnect_time = jiffies + - peer->ibp_reconnect_interval * HZ; - - /* Take peer's blocked transmits to complete with error */ - list_add(&zombies, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - if (kibnal_peer_active(peer) && - (peer->ibp_persistence == 0)) { - /* failed connection attempt on non-persistent peer */ - kibnal_unlink_peer_locked (peer); - } - - peer->ibp_error = error; - } else { - /* Can't have blocked transmits if there are connections */ - LASSERT (list_empty(&peer->ibp_tx_queue)); - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - kibnal_peer_notify(peer); - - if (list_empty (&zombies)) - return; - - CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_txlist_done(&zombies, -EHOSTUNREACH); -} - -void -kibnal_reject(cm_cep_handle_t cep, int why) -{ - static cm_reject_data_t rejs[3]; - cm_reject_data_t *rej = &rejs[why]; - - LASSERT (why >= 0 && why < sizeof(rejs)/sizeof(rejs[0])); - - /* If I wasn't so lazy, I'd initialise this only once; it's effective - * read-only */ - rej->reason = cm_rej_code_usr_rej; - rej->priv_data[0] = (IBNAL_MSG_MAGIC) & 0xff; - rej->priv_data[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff; - rej->priv_data[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff; - rej->priv_data[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff; - rej->priv_data[4] = (IBNAL_MSG_VERSION) & 0xff; - rej->priv_data[5] = (IBNAL_MSG_VERSION >> 8) & 0xff; - rej->priv_data[6] = why; - - cm_reject(cep, rej); -} - -void -kibnal_connreq_done(kib_conn_t *conn, int active, int status) -{ - struct list_head txs; - kib_peer_t *peer = conn->ibc_peer; - unsigned long flags; - kib_tx_t *tx; - - CDEBUG(D_NET,"%d\n", status); - - /* Only the connd creates conns => single threaded */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED); - - if (active) { - LASSERT (peer->ibp_connecting > 0); - } else { - LASSERT (peer->ibp_accepting > 0); - } - - LIBCFS_FREE(conn->ibc_connvars, sizeof(*conn->ibc_connvars)); - conn->ibc_connvars = NULL; - - if (status != 0) { - /* failed to establish connection */ - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_CHECK_REPLY: - /* got a connection reply but failed checks */ - LASSERT (active); - kibnal_reject(conn->ibc_cep, IBNAL_REJECT_FATAL); - break; - - case IBNAL_CONN_ACTIVE_CONNECT: - LASSERT (active); - cm_cancel(conn->ibc_cep); - cfs_pause(cfs_time_seconds(1)/10); - /* cm_connect() failed immediately or - * callback returned failure */ - break; - - case IBNAL_CONN_ACTIVE_ARP: - LASSERT (active); - /* ibat_get_ib_data() failed immediately - * or callback returned failure */ - break; - - case IBNAL_CONN_INIT: - break; - - case IBNAL_CONN_PASSIVE_WAIT: - LASSERT (!active); - /* cm_accept callback returned failure */ - break; - } - - kibnal_peer_connect_failed(conn->ibc_peer, active, status); - kibnal_conn_disconnected(conn); - return; - } - - /* connection established */ - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (active) { - LASSERT(conn->ibc_state == IBNAL_CONN_ACTIVE_RTU); - } else { - LASSERT(conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT); - } - - conn->ibc_last_send = jiffies; - kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED); - kibnal_peer_alive(peer); - - /* Add conn to peer's list and nuke any dangling conns from a different - * peer instance... */ - kibnal_conn_addref(conn); /* +1 ref for ibc_list */ - list_add(&conn->ibc_list, &peer->ibp_conns); - kibnal_close_stale_conns_locked (conn->ibc_peer, - conn->ibc_incarnation); - - if (!kibnal_peer_active(peer) || /* peer has been deleted */ - conn->ibc_comms_error != 0 || /* comms error */ - conn->ibc_disconnect) { /* need to disconnect */ - - /* start to shut down connection */ - kibnal_close_conn_locked(conn, -ECONNABORTED); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - kibnal_peer_connect_failed(peer, active, -ECONNABORTED); - return; - } - - if (active) - peer->ibp_connecting--; - else - peer->ibp_accepting--; - - /* grab pending txs while I have the lock */ - list_add(&txs, &peer->ibp_tx_queue); - list_del_init(&peer->ibp_tx_queue); - - peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */ - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - /* Schedule blocked txs */ - spin_lock (&conn->ibc_lock); - while (!list_empty (&txs)) { - tx = list_entry (txs.next, kib_tx_t, tx_list); - list_del (&tx->tx_list); - - kibnal_queue_tx_locked (tx, conn); - } - spin_unlock (&conn->ibc_lock); - kibnal_check_sends (conn); - - /* schedule blocked rxs */ - kibnal_handle_early_rxs(conn); -} - -void -kibnal_cm_callback(cm_cep_handle_t cep, cm_conn_data_t *cmdata, void *arg) -{ - static cm_dreply_data_t drep; /* just zeroed space */ - - kib_conn_t *conn = (kib_conn_t *)arg; - unsigned long flags; - - /* CAVEAT EMPTOR: tasklet context */ - - switch (cmdata->status) { - default: - LBUG(); - - case cm_event_disconn_request: - /* IBNAL_CONN_ACTIVE_RTU: gets closed in kibnal_connreq_done - * IBNAL_CONN_ESTABLISHED: I start it closing - * otherwise: it's closing anyway */ - cm_disconnect(conn->ibc_cep, NULL, &drep); - cm_cancel(conn->ibc_cep); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - LASSERT (!conn->ibc_disconnect); - conn->ibc_disconnect = 1; - - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_RTU: - /* kibnal_connreq_done is getting there; It'll see - * ibc_disconnect set... */ - break; - - case IBNAL_CONN_ESTABLISHED: - /* kibnal_connreq_done got there already; get - * disconnect going... */ - kibnal_close_conn_locked(conn, 0); - break; - - case IBNAL_CONN_DISCONNECT1: - /* kibnal_disconnect_conn is getting there; It'll see - * ibc_disconnect set... */ - break; - - case IBNAL_CONN_DISCONNECT2: - /* kibnal_disconnect_conn got there already; complete - * the disconnect. */ - kibnal_schedule_conn(conn); - break; - } - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - break; - - case cm_event_disconn_timeout: - case cm_event_disconn_reply: - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT2); - LASSERT (!conn->ibc_disconnect); - conn->ibc_disconnect = 1; - - /* kibnal_disconnect_conn sent the disconnect request. */ - kibnal_schedule_conn(conn); - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - break; - - case cm_event_connected: - case cm_event_conn_timeout: - case cm_event_conn_reject: - LASSERT (conn->ibc_state == IBNAL_CONN_PASSIVE_WAIT); - conn->ibc_connvars->cv_conndata = *cmdata; - - kibnal_schedule_conn(conn); - break; - } - - kibnal_conn_decref(conn); /* lose my ref */ -} - -void -kibnal_check_passive_wait(kib_conn_t *conn) -{ - int rc; - - switch (conn->ibc_connvars->cv_conndata.status) { - default: - LBUG(); - - case cm_event_connected: - kibnal_conn_addref(conn); /* ++ ref for CM callback */ - rc = kibnal_set_qp_state(conn, vv_qp_state_rts); - if (rc != 0) - conn->ibc_comms_error = rc; - /* connection _has_ been established; it's just that we've had - * an error immediately... */ - kibnal_connreq_done(conn, 0, 0); - break; - - case cm_event_conn_timeout: - kibnal_connreq_done(conn, 0, -ETIMEDOUT); - break; - - case cm_event_conn_reject: - kibnal_connreq_done(conn, 0, -ECONNRESET); - break; - } -} - -void -kibnal_recv_connreq(cm_cep_handle_t *cep, cm_request_data_t *cmreq) -{ - static kib_msg_t txmsg; - static kib_msg_t rxmsg; - static cm_reply_data_t reply; - - kib_conn_t *conn = NULL; - int rc = 0; - int reason; - int rxmsgnob; - rwlock_t *g_lock = &kibnal_data.kib_global_lock; - kib_peer_t *peer; - kib_peer_t *peer2; - unsigned long flags; - kib_connvars_t *cv; - cm_return_t cmrc; - vv_return_t vvrc; - - /* I'm the connd executing in thread context - * No concurrency problems with static data! */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - if (cmreq->sid != (__u64)(*kibnal_tunables.kib_service_number)) { - CERROR(LPX64" != IBNAL_SERVICE_NUMBER("LPX64")\n", - cmreq->sid, (__u64)(*kibnal_tunables.kib_service_number)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - /* copy into rxmsg to avoid alignment issues */ - rxmsgnob = MIN(cm_REQ_priv_data_len, sizeof(rxmsg)); - memcpy(&rxmsg, cmreq->priv_data, rxmsgnob); - - rc = kibnal_unpack_msg(&rxmsg, 0, rxmsgnob); - if (rc != 0) { - /* SILENT! kibnal_unpack_msg() complains if required */ - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_version != IBNAL_MSG_VERSION) - CWARN("Connection from %s: old protocol version 0x%x\n", - libcfs_nid2str(rxmsg.ibm_srcnid), rxmsg.ibm_version); - - if (rxmsg.ibm_type != IBNAL_MSG_CONNREQ) { - CERROR("Unexpected connreq msg type: %x from %s\n", - rxmsg.ibm_type, libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (!lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - rxmsg.ibm_dstnid)) { - CERROR("Can't accept %s: bad dst nid %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - libcfs_nid2str(rxmsg.ibm_dstnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("Can't accept %s: incompatible queue depth %d (%d wanted)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) { - CERROR("Can't accept %s: message size %d too big (%d max)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_max_msg_size, - IBNAL_MSG_SIZE); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - if (rxmsg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("Can't accept %s: max frags %d too big (%d max)\n", - libcfs_nid2str(rxmsg.ibm_srcnid), - rxmsg.ibm_u.connparams.ibcp_max_frags, - IBNAL_MAX_RDMA_FRAGS); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - /* assume 'rxmsg.ibm_srcnid' is a new peer; create */ - rc = kibnal_create_peer (&peer, rxmsg.ibm_srcnid); - if (rc != 0) { - CERROR("Can't create peer for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_NO_RESOURCES; - goto reject; - } - - write_lock_irqsave(g_lock, flags); - - peer2 = kibnal_find_peer_locked(rxmsg.ibm_srcnid); - if (peer2 != NULL) { - /* tie-break connection race in favour of the higher NID */ - if (peer2->ibp_connecting != 0 && - rxmsg.ibm_srcnid < kibnal_data.kib_ni->ni_nid) { - write_unlock_irqrestore(g_lock, flags); - - CWARN("Conn race %s\n", - libcfs_nid2str(peer2->ibp_nid)); - - kibnal_peer_decref(peer); - reason = IBNAL_REJECT_CONN_RACE; - goto reject; - } - - peer2->ibp_accepting++; - kibnal_peer_addref(peer2); - - write_unlock_irqrestore(g_lock, flags); - kibnal_peer_decref(peer); - peer = peer2; - } else { - /* Brand new peer */ - LASSERT (peer->ibp_accepting == 0); - peer->ibp_accepting = 1; - - kibnal_peer_addref(peer); - list_add_tail(&peer->ibp_list, kibnal_nid2peerlist(rxmsg.ibm_srcnid)); - - write_unlock_irqrestore(g_lock, flags); - } - - conn = kibnal_create_conn(cep); - if (conn == NULL) { - CERROR("Can't create conn for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - kibnal_peer_connect_failed(peer, 0, -ENOMEM); - kibnal_peer_decref(peer); - reason = IBNAL_REJECT_NO_RESOURCES; - goto reject; - } - - conn->ibc_version = rxmsg.ibm_version; - - conn->ibc_peer = peer; /* conn takes over my ref */ - conn->ibc_incarnation = rxmsg.ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - cv = conn->ibc_connvars; - - cv->cv_txpsn = cmreq->cep_data.start_psn; - cv->cv_remote_qpn = cmreq->cep_data.qpn; - cv->cv_path = cmreq->path_data.path; - cv->cv_rnr_count = cmreq->cep_data.rtr_retry_cnt; - // XXX cmreq->cep_data.retry_cnt; - cv->cv_port = cmreq->cep_data.local_port_num; - - vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port, - &cv->cv_path.sgid, &cv->cv_sgid_index); - if (vvrc != vv_return_ok) { - CERROR("gid2gid_index failed for %s: %d\n", - libcfs_nid2str(rxmsg.ibm_srcnid), vvrc); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port, - cv->cv_path.pkey, &cv->cv_pkey_index); - if (vvrc != vv_return_ok) { - CERROR("pkey2pkey_index failed for %s: %d\n", - libcfs_nid2str(rxmsg.ibm_srcnid), vvrc); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_init); - if (rc != 0) { - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rtr); - if (rc != 0) { - reason = IBNAL_REJECT_FATAL; - goto reject; - } - - memset(&reply, 0, sizeof(reply)); - reply.qpn = cv->cv_local_qpn; - reply.qkey = IBNAL_QKEY; - reply.start_psn = cv->cv_rxpsn; - reply.arb_initiator_depth = IBNAL_ARB_INITIATOR_DEPTH; - reply.arb_resp_res = IBNAL_ARB_RESP_RES; - reply.failover_accepted = IBNAL_FAILOVER_ACCEPTED; - reply.rnr_retry_count = cv->cv_rnr_count; - reply.targ_ack_delay = kibnal_data.kib_hca_attrs.ack_delay; - - /* setup txmsg... */ - memset(&txmsg, 0, sizeof(txmsg)); - kibnal_init_msg(&txmsg, IBNAL_MSG_CONNACK, - sizeof(txmsg.ibm_u.connparams)); - LASSERT (txmsg.ibm_nob <= cm_REP_priv_data_len); - txmsg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - txmsg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - txmsg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - kibnal_pack_msg(&txmsg, conn->ibc_version, - 0, rxmsg.ibm_srcnid, rxmsg.ibm_srcstamp, 0); - - /* ...and copy into reply to avoid alignment issues */ - memcpy(&reply.priv_data, &txmsg, txmsg.ibm_nob); - - kibnal_set_conn_state(conn, IBNAL_CONN_PASSIVE_WAIT); - - cmrc = cm_accept(conn->ibc_cep, &reply, NULL, - kibnal_cm_callback, conn); - - if (cmrc == cm_stat_success) - return; /* callback has got my ref on conn */ - - /* back out state change (no callback happening) */ - kibnal_set_conn_state(conn, IBNAL_CONN_INIT); - rc = -EIO; - reason = IBNAL_REJECT_FATAL; - - reject: - CDEBUG(D_NET, "Rejecting connreq from %s\n", - libcfs_nid2str(rxmsg.ibm_srcnid)); - - kibnal_reject(cep, reason); - - if (conn != NULL) { - LASSERT (rc != 0); - kibnal_connreq_done(conn, 0, rc); - } else { - cm_destroy_cep(cep); - } -} - -void -kibnal_listen_callback(cm_cep_handle_t cep, cm_conn_data_t *data, void *arg) -{ - cm_request_data_t *cmreq = &data->data.request; - kib_pcreq_t *pcr; - unsigned long flags; - - LASSERT (arg == NULL); - - if (data->status != cm_event_conn_request) { - CERROR("status %d is not cm_event_conn_request\n", - data->status); - return; - } - - LIBCFS_ALLOC_ATOMIC(pcr, sizeof(*pcr)); - if (pcr == NULL) { - CERROR("Can't allocate passive connreq\n"); - - kibnal_reject(cep, IBNAL_REJECT_NO_RESOURCES); - cm_destroy_cep(cep); - return; - } - - pcr->pcr_cep = cep; - pcr->pcr_cmreq = *cmreq; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - list_add_tail(&pcr->pcr_list, &kibnal_data.kib_connd_pcreqs); - wake_up(&kibnal_data.kib_connd_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); -} - - -void -kibnal_active_connect_callback (cm_cep_handle_t cep, cm_conn_data_t *cd, - void *arg) -{ - /* CAVEAT EMPTOR: tasklet context */ - kib_conn_t *conn = (kib_conn_t *)arg; - kib_connvars_t *cv = conn->ibc_connvars; - - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - cv->cv_conndata = *cd; - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); -} - -void -kibnal_connect_conn (kib_conn_t *conn) -{ - static cm_request_data_t cmreq; - static kib_msg_t msg; - - kib_connvars_t *cv = conn->ibc_connvars; - kib_peer_t *peer = conn->ibc_peer; - cm_return_t cmrc; - - /* Only called by connd => statics OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - - memset(&cmreq, 0, sizeof(cmreq)); - - cmreq.sid = (__u64)(*kibnal_tunables.kib_service_number); - - cmreq.cep_data.ca_guid = kibnal_data.kib_hca_attrs.guid; - cmreq.cep_data.qpn = cv->cv_local_qpn; - cmreq.cep_data.retry_cnt = *kibnal_tunables.kib_retry_cnt; - cmreq.cep_data.rtr_retry_cnt = *kibnal_tunables.kib_rnr_cnt; - cmreq.cep_data.start_psn = cv->cv_rxpsn; - cmreq.cep_data.end_to_end_flow_ctrl = IBNAL_EE_FLOW_CNT; - // XXX ack_timeout? - // offered_resp_res - // offered_initiator_depth - - cmreq.path_data.subn_local = IBNAL_LOCAL_SUB; - cmreq.path_data.path = cv->cv_path; - - /* setup msg... */ - memset(&msg, 0, sizeof(msg)); - kibnal_init_msg(&msg, IBNAL_MSG_CONNREQ, sizeof(msg.ibm_u.connparams)); - LASSERT(msg.ibm_nob <= cm_REQ_priv_data_len); - msg.ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE; - msg.ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE; - msg.ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS; - kibnal_pack_msg(&msg, conn->ibc_version, 0, peer->ibp_nid, 0, 0); - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 1) != 0) { - msg.ibm_version++; - the_lnet.ln_testprotocompat &= ~1; - } - if ((the_lnet.ln_testprotocompat & 2) != 0) { - msg.ibm_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~2; - } - LNET_UNLOCK(); - } - - /* ...and copy into cmreq to avoid alignment issues */ - memcpy(&cmreq.priv_data, &msg, msg.ibm_nob); - - CDEBUG(D_NET, "Connecting %p to %s\n", conn, - libcfs_nid2str(peer->ibp_nid)); - - kibnal_conn_addref(conn); /* ++ref for CM callback */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CONNECT); - - cmrc = cm_connect(conn->ibc_cep, &cmreq, - kibnal_active_connect_callback, conn); - if (cmrc == cm_stat_success) { - CDEBUG(D_NET, "connection REQ sent to %s\n", - libcfs_nid2str(peer->ibp_nid)); - return; - } - - CERROR ("Connect %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), cmrc); - kibnal_conn_decref(conn); /* drop callback's ref */ - kibnal_connreq_done(conn, 1, -EHOSTUNREACH); -} - -void -kibnal_reconnect (kib_conn_t *conn, int why) -{ - kib_peer_t *peer = conn->ibc_peer; - int retry; - unsigned long flags; - cm_return_t cmrc; - cm_cep_handle_t cep; - - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - LASSERT (peer->ibp_connecting > 0); /* 'conn' at least */ - - /* retry connection if it's still needed and no other connection - * attempts (active or passive) are in progress. - * Immediate reconnect is required, so I don't even look at the - * reconnection timeout etc */ - - retry = (!list_empty(&peer->ibp_tx_queue) && - peer->ibp_connecting == 1 && - peer->ibp_accepting == 0); - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - if (!retry) { - kibnal_connreq_done(conn, 1, why); - return; - } - - cep = cm_create_cep(cm_cep_transp_rc); - if (cep == NULL) { - CERROR("Can't create new CEP\n"); - kibnal_connreq_done(conn, 1, -ENOMEM); - return; - } - - cmrc = cm_cancel(conn->ibc_cep); - LASSERT (cmrc == cm_stat_success); - cmrc = cm_destroy_cep(conn->ibc_cep); - LASSERT (cmrc == cm_stat_success); - - conn->ibc_cep = cep; - - /* reuse conn; no need to peer->ibp_connecting++ */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP); - kibnal_connect_conn(conn); -} - -void -kibnal_check_connreply (kib_conn_t *conn) -{ - static cm_rtu_data_t rtu; - static kib_msg_t msg; - - kib_connvars_t *cv = conn->ibc_connvars; - cm_reply_data_t *reply = &cv->cv_conndata.data.reply; - kib_peer_t *peer = conn->ibc_peer; - int msgnob; - cm_return_t cmrc; - unsigned long flags; - int rc; - - /* Only called by connd => statics OK */ - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_CONNECT); - - if (cv->cv_conndata.status == cm_event_conn_reply) { - cv->cv_remote_qpn = reply->qpn; - cv->cv_txpsn = reply->start_psn; - // XXX reply->targ_ack_delay; - cv->cv_rnr_count = reply->rnr_retry_count; - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY); - - /* copy into msg to avoid alignment issues */ - msgnob = MIN(cm_REP_priv_data_len, sizeof(msg)); - memcpy(&msg, &reply->priv_data, msgnob); - - rc = kibnal_unpack_msg(&msg, conn->ibc_version, msgnob); - if (rc != 0) { - CERROR("Can't unpack reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - if (msg.ibm_type != IBNAL_MSG_CONNACK ) { - CERROR("Unexpected message type %d from %s\n", - msg.ibm_type, libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE) { - CERROR("%s has incompatible queue depth %d(%d wanted)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_queue_depth, - IBNAL_MSG_QUEUE_SIZE); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE) { - CERROR("%s max message size %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_max_msg_size, - IBNAL_MSG_SIZE); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - if (msg.ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) { - CERROR("%s max frags %d too big (%d max)\n", - libcfs_nid2str(peer->ibp_nid), - msg.ibm_u.connparams.ibcp_max_frags, - IBNAL_MAX_RDMA_FRAGS); - kibnal_connreq_done(conn, 1, -EPROTO); - return; - } - - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - if (lnet_ptlcompat_matchnid(kibnal_data.kib_ni->ni_nid, - msg.ibm_dstnid) && - msg.ibm_dststamp == kibnal_data.kib_incarnation) - rc = 0; - else - rc = -ESTALE; - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - if (rc != 0) { - CERROR("Stale connection reply from %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - conn->ibc_incarnation = msg.ibm_srcstamp; - conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE; - conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE; - LASSERT (conn->ibc_credits + conn->ibc_reserved_credits - <= IBNAL_RX_MSGS); - - rc = kibnal_post_receives(conn); - if (rc != 0) { - CERROR("Can't post receives for %s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_connreq_done(conn, 1, rc); - return; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rtr); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - return; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_rts); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - return; - } - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_RTU); - kibnal_conn_addref(conn); /* ++for CM callback */ - - memset(&rtu, 0, sizeof(rtu)); - cmrc = cm_accept(conn->ibc_cep, NULL, &rtu, - kibnal_cm_callback, conn); - if (cmrc == cm_stat_success) { - /* Now I'm racing with disconnect signalled by - * kibnal_cm_callback */ - kibnal_connreq_done(conn, 1, 0); - return; - } - - CERROR("cm_accept %s failed: %d\n", - libcfs_nid2str(peer->ibp_nid), cmrc); - /* Back out of RTU: no callback coming */ - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_CHECK_REPLY); - kibnal_conn_decref(conn); - kibnal_connreq_done(conn, 1, -EIO); - return; - } - - if (cv->cv_conndata.status == cm_event_conn_reject) { - - if (cv->cv_conndata.data.reject.reason == cm_rej_code_usr_rej) { - unsigned char *bytes = - cv->cv_conndata.data.reject.priv_data; - int magic = (bytes[0]) | - (bytes[1] << 8) | - (bytes[2] << 16) | - (bytes[3] << 24); - int version = (bytes[4]) | - (bytes[5] << 8); - int why = (bytes[6]); - - /* Expected proto/version: she just doesn't like me (or - * ran out of resources) */ - if (magic == IBNAL_MSG_MAGIC && - version == conn->ibc_version) { - CERROR("conn -> %s rejected: fatal error %d\n", - libcfs_nid2str(peer->ibp_nid), why); - - if (why == IBNAL_REJECT_CONN_RACE) - kibnal_reconnect(conn, -EALREADY); - else - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - - /* Fail unless it's worth retrying with an old proto - * version */ - if (!(magic == IBNAL_MSG_MAGIC && - version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD && - conn->ibc_version == IBNAL_MSG_VERSION)) { - CERROR("conn -> %s rejected: bad protocol " - "magic/ver %08x/%x why %d\n", - libcfs_nid2str(peer->ibp_nid), - magic, version, why); - - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - - conn->ibc_version = version; - CWARN ("Connection to %s refused: " - "retrying with old protocol version 0x%x\n", - libcfs_nid2str(peer->ibp_nid), version); - - kibnal_reconnect(conn, -ECONNREFUSED); - return; - } else if (cv->cv_conndata.data.reject.reason == - cm_rej_code_stale_conn) { - - CWARN ("conn -> %s stale: retrying\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_reconnect(conn, -ESTALE); - return; - } else { - CDEBUG(D_NETERROR, "conn -> %s rejected: reason %d\n", - libcfs_nid2str(peer->ibp_nid), - cv->cv_conndata.data.reject.reason); - kibnal_connreq_done(conn, 1, -ECONNREFUSED); - return; - } - /* NOT REACHED */ - } - - CDEBUG(D_NETERROR, "conn -> %s failed: %d\n", - libcfs_nid2str(peer->ibp_nid), cv->cv_conndata.status); - kibnal_connreq_done(conn, 1, -ECONNABORTED); -} - -void -kibnal_arp_done (kib_conn_t *conn) -{ - kib_peer_t *peer = conn->ibc_peer; - kib_connvars_t *cv = conn->ibc_connvars; - ibat_arp_data_t *arp = &cv->cv_arp; - ib_path_record_v2_t *path = &cv->cv_path; - vv_return_t vvrc; - int rc; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - LASSERT (peer->ibp_arp_count > 0); - - if (cv->cv_arprc != ibat_stat_ok) { - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed: %d\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - cv->cv_arprc); - goto failed; - } - - if ((arp->mask & IBAT_PRI_PATH_VALID) != 0) { - CDEBUG(D_NET, "Got valid path for %s\n", - libcfs_nid2str(peer->ibp_nid)); - - *path = *arp->primary_path; - - vvrc = base_gid2port_num(kibnal_data.kib_hca, &path->sgid, - &cv->cv_port); - if (vvrc != vv_return_ok) { - CWARN("base_gid2port_num failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = gid2gid_index(kibnal_data.kib_hca, cv->cv_port, - &path->sgid, &cv->cv_sgid_index); - if (vvrc != vv_return_ok) { - CWARN("gid2gid_index failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = pkey2pkey_index(kibnal_data.kib_hca, cv->cv_port, - path->pkey, &cv->cv_pkey_index); - if (vvrc != vv_return_ok) { - CWARN("pkey2pkey_index failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_nid), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - path->mtu = IBNAL_IB_MTU; - - } else if ((arp->mask & IBAT_LID_VALID) != 0) { - CWARN("Creating new path record for %s @ %u.%u.%u.%u\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - - cv->cv_pkey_index = IBNAL_PKEY_IDX; - cv->cv_sgid_index = IBNAL_SGID_IDX; - cv->cv_port = arp->local_port_num; - - memset(path, 0, sizeof(*path)); - - vvrc = port_num2base_gid(kibnal_data.kib_hca, cv->cv_port, - &path->sgid); - if (vvrc != vv_return_ok) { - CWARN("port_num2base_gid failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_ip), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - vvrc = port_num2base_lid(kibnal_data.kib_hca, cv->cv_port, - &path->slid); - if (vvrc != vv_return_ok) { - CWARN("port_num2base_lid failed for %s @ %u.%u.%u.%u: %d\n", - libcfs_nid2str(peer->ibp_ip), - HIPQUAD(peer->ibp_ip), vvrc); - goto failed; - } - - path->dgid = arp->gid; - path->sl = IBNAL_SERVICE_LEVEL; - path->dlid = arp->lid; - path->mtu = IBNAL_IB_MTU; - path->rate = IBNAL_STATIC_RATE; - path->pkt_life_time = IBNAL_PKT_LIFETIME; - path->pkey = IBNAL_PKEY; - path->traffic_class = IBNAL_TRAFFIC_CLASS; - } else { - CWARN("Arp for %s @ %u.%u.%u.%u returned neither PATH nor LID\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - goto failed; - } - - rc = kibnal_set_qp_state(conn, vv_qp_state_init); - if (rc != 0) { - kibnal_connreq_done(conn, 1, rc); - } - - /* do the actual connection request */ - kibnal_connect_conn(conn); - return; - - failed: - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - peer->ibp_arp_count--; - if (peer->ibp_arp_count == 0) { - /* final ARP attempt failed */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (final attempt)\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip)); - } else { - /* Retry ARP: ibp_connecting++ so terminating conn - * doesn't end peer's connection attempt */ - peer->ibp_connecting++; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - CDEBUG(D_NETERROR, "Arp %s @ %u.%u.%u.%u failed (%d attempts left)\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - peer->ibp_arp_count); - - kibnal_schedule_peer_arp(peer); - } - kibnal_connreq_done(conn, 1, -ENETUNREACH); -} - -void -kibnal_arp_callback (ibat_stat_t arprc, ibat_arp_data_t *arp_data, void *arg) -{ - /* CAVEAT EMPTOR: tasklet context */ - kib_peer_t *peer; - kib_conn_t *conn = (kib_conn_t *)arg; - - LASSERT (conn != NULL); - LASSERT (conn->ibc_state == IBNAL_CONN_ACTIVE_ARP); - - peer = conn->ibc_peer; - - if (arprc != ibat_stat_ok) - CDEBUG(D_NETERROR, "Arp %s at %u.%u.%u.%u failed: %d\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), arprc); - else - CDEBUG(D_NET, "Arp %s at %u.%u.%u.%u OK: LID %s PATH %s\n", - libcfs_nid2str(peer->ibp_nid), HIPQUAD(peer->ibp_ip), - (arp_data->mask & IBAT_LID_VALID) == 0 ? "invalid" : "valid", - (arp_data->mask & IBAT_PRI_PATH_VALID) == 0 ? "invalid" : "valid"); - - conn->ibc_connvars->cv_arprc = arprc; - if (arprc == ibat_stat_ok) - conn->ibc_connvars->cv_arp = *arp_data; - - kibnal_schedule_conn(conn); - kibnal_conn_decref(conn); -} - -void -kibnal_arp_peer (kib_peer_t *peer) -{ - cm_cep_handle_t cep; - kib_conn_t *conn; - int ibatrc; - - /* Only the connd does this (i.e. single threaded) */ - LASSERT (current == kibnal_data.kib_connd); - LASSERT (peer->ibp_connecting != 0); - LASSERT (peer->ibp_arp_count > 0); - - cep = cm_create_cep(cm_cep_transp_rc); - if (cep == NULL) { - CERROR ("Can't create cep for conn->%s\n", - libcfs_nid2str(peer->ibp_nid)); - kibnal_peer_connect_failed(peer, 1, -ENOMEM); - return; - } - - conn = kibnal_create_conn(cep); - if (conn == NULL) { - CERROR ("Can't allocate conn->%s\n", - libcfs_nid2str(peer->ibp_nid)); - cm_destroy_cep(cep); - kibnal_peer_connect_failed(peer, 1, -ENOMEM); - return; - } - - conn->ibc_peer = peer; - kibnal_peer_addref(peer); - - kibnal_set_conn_state(conn, IBNAL_CONN_ACTIVE_ARP); - - ibatrc = ibat_get_ib_data(htonl(peer->ibp_ip), INADDR_ANY, - ibat_paths_primary, - &conn->ibc_connvars->cv_arp, - kibnal_arp_callback, conn, 0); - CDEBUG(D_NET,"ibatrc %d\n", ibatrc); - switch (ibatrc) { - default: - LBUG(); - - case ibat_stat_pending: - /* NB callback has my ref on conn */ - break; - - case ibat_stat_ok: - case ibat_stat_error: - case ibat_stat_timeout: - case ibat_stat_not_found: - /* Immediate return (ARP cache hit or failure) == no callback. - * Do the next stage directly... */ - conn->ibc_connvars->cv_arprc = ibatrc; - kibnal_arp_done(conn); - kibnal_conn_decref(conn); - break; - } -} - -int -kibnal_check_txs (kib_conn_t *conn, struct list_head *txs) -{ - kib_tx_t *tx; - struct list_head *ttmp; - int timed_out = 0; - - spin_lock(&conn->ibc_lock); - - list_for_each (ttmp, txs) { - tx = list_entry (ttmp, kib_tx_t, tx_list); - - if (txs == &conn->ibc_active_txs) { - LASSERT (!tx->tx_queued); - LASSERT (tx->tx_waiting || tx->tx_sending != 0); - } else { - LASSERT (tx->tx_queued); - } - - if (time_after_eq (jiffies, tx->tx_deadline)) { - timed_out = 1; - break; - } - } - - spin_unlock(&conn->ibc_lock); - return timed_out; -} - -int -kibnal_conn_timed_out (kib_conn_t *conn) -{ - return kibnal_check_txs(conn, &conn->ibc_tx_queue) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) || - kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) || - kibnal_check_txs(conn, &conn->ibc_active_txs); -} - -void -kibnal_check_conns (int idx) -{ - struct list_head *peers = &kibnal_data.kib_peers[idx]; - struct list_head *ptmp; - kib_peer_t *peer; - kib_conn_t *conn; - struct list_head *ctmp; - unsigned long flags; - - again: - /* NB. We expect to have a look at all the peers and not find any - * rdmas to time out, so we just use a shared lock while we - * take a look... */ - read_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - list_for_each (ptmp, peers) { - peer = list_entry (ptmp, kib_peer_t, ibp_list); - - list_for_each (ctmp, &peer->ibp_conns) { - conn = list_entry (ctmp, kib_conn_t, ibc_list); - - LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED); - - /* In case we have enough credits to return via a - * NOOP, but there were no non-blocking tx descs - * free to do it last time... */ - kibnal_check_sends(conn); - - if (!kibnal_conn_timed_out(conn)) - continue; - - /* Handle timeout by closing the whole connection. We - * can only be sure RDMA activity has ceased once the - * QP has been modified. */ - - kibnal_conn_addref(conn); /* 1 ref for me... */ - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - - CERROR("Timed out RDMA with %s\n", - libcfs_nid2str(peer->ibp_nid)); - - kibnal_close_conn (conn, -ETIMEDOUT); - kibnal_conn_decref(conn); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); -} - -void -kibnal_disconnect_conn (kib_conn_t *conn) -{ - static cm_drequest_data_t dreq; /* just for the space */ - - cm_return_t cmrc; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (current == kibnal_data.kib_connd); - - write_lock_irqsave(&kibnal_data.kib_global_lock, flags); - - if (conn->ibc_disconnect) { - /* Had the CM callback already */ - write_unlock_irqrestore(&kibnal_data.kib_global_lock, - flags); - kibnal_conn_disconnected(conn); - return; - } - - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1); - - /* active disconnect */ - cmrc = cm_disconnect(conn->ibc_cep, &dreq, NULL); - if (cmrc == cm_stat_success) { - /* waiting for CM */ - conn->ibc_state = IBNAL_CONN_DISCONNECT2; - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - return; - } - - write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags); - - cm_cancel(conn->ibc_cep); - cfs_pause(cfs_time_seconds(1)/10); - - if (!conn->ibc_disconnect) /* CM callback will never happen now */ - kibnal_conn_decref(conn); - - LASSERT (atomic_read(&conn->ibc_refcount) > 0); - LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECT1); - - kibnal_conn_disconnected(conn); -} - -int -kibnal_connd (void *arg) -{ - wait_queue_t wait; - unsigned long flags; - kib_pcreq_t *pcr; - kib_conn_t *conn; - kib_peer_t *peer; - int timeout; - int i; - int dropped_lock; - int peer_index = 0; - unsigned long deadline = jiffies; - - cfs_daemonize ("kibnal_connd"); - cfs_block_allsigs (); - - init_waitqueue_entry (&wait, current); - kibnal_data.kib_connd = current; - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - - while (!kibnal_data.kib_shutdown) { - - dropped_lock = 0; - - if (!list_empty (&kibnal_data.kib_connd_zombies)) { - conn = list_entry (kibnal_data.kib_connd_zombies.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_destroy_conn(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_pcreqs)) { - pcr = list_entry(kibnal_data.kib_connd_pcreqs.next, - kib_pcreq_t, pcr_list); - list_del(&pcr->pcr_list); - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_recv_connreq(pcr->pcr_cep, &pcr->pcr_cmreq); - LIBCFS_FREE(pcr, sizeof(*pcr)); - - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_peers)) { - peer = list_entry (kibnal_data.kib_connd_peers.next, - kib_peer_t, ibp_connd_list); - - list_del_init (&peer->ibp_connd_list); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - kibnal_arp_peer (peer); - kibnal_peer_decref (peer); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - if (!list_empty (&kibnal_data.kib_connd_conns)) { - conn = list_entry (kibnal_data.kib_connd_conns.next, - kib_conn_t, ibc_list); - list_del (&conn->ibc_list); - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - switch (conn->ibc_state) { - default: - LBUG(); - - case IBNAL_CONN_ACTIVE_ARP: - kibnal_arp_done(conn); - break; - - case IBNAL_CONN_ACTIVE_CONNECT: - kibnal_check_connreply(conn); - break; - - case IBNAL_CONN_PASSIVE_WAIT: - kibnal_check_passive_wait(conn); - break; - - case IBNAL_CONN_DISCONNECT1: - case IBNAL_CONN_DISCONNECT2: - kibnal_disconnect_conn(conn); - break; - } - kibnal_conn_decref(conn); - - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - /* careful with the jiffy wrap... */ - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kibnal_data.kib_peer_hash_size; - - spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); - dropped_lock = 1; - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if (*kibnal_tunables.kib_timeout > n * p) - chunk = (chunk * n * p) / - *kibnal_tunables.kib_timeout; - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kibnal_check_conns (peer_index); - peer_index = (peer_index + 1) % - kibnal_data.kib_peer_hash_size; - } - - deadline += p * HZ; - spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); - } - - if (dropped_lock) - continue; - - /* Nothing to do for 'timeout' */ - set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - schedule_timeout (timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait); - spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags); - } - - spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags); - - kibnal_thread_fini (); - return (0); -} - -void -kibnal_async_callback(vv_event_record_t ev) -{ - CERROR("type: %d, port: %d, data: "LPX64"\n", - ev.event_type, ev.port_num, ev.type.data); -} - -void -kibnal_cq_callback (unsigned long unused_context) -{ - unsigned long flags; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); -} - -int -kibnal_scheduler(void *arg) -{ - long id = (long)arg; - wait_queue_t wait; - char name[16]; - vv_wc_t wc; - vv_return_t vvrc; - vv_return_t vvrc2; - unsigned long flags; - kib_rx_t *rx; - __u64 rxseq = 0; - int busy_loops = 0; - - snprintf(name, sizeof(name), "kibnal_sd_%02ld", id); - cfs_daemonize(name); - cfs_block_allsigs(); - - init_waitqueue_entry(&wait, current); - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - - while (!kibnal_data.kib_shutdown) { - if (busy_loops++ >= IBNAL_RESCHED) { - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - our_cond_resched(); - busy_loops = 0; - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - if (kibnal_data.kib_ready && - !kibnal_data.kib_checking_cq) { - /* take ownership of completion polling */ - kibnal_data.kib_checking_cq = 1; - /* Assume I'll exhaust the CQ */ - kibnal_data.kib_ready = 0; - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - vvrc = vv_poll_for_completion(kibnal_data.kib_hca, - kibnal_data.kib_cq, &wc); - if (vvrc == vv_return_err_cq_empty) { - vvrc2 = vv_request_completion_notification( - kibnal_data.kib_hca, - kibnal_data.kib_cq, - vv_next_solicit_unsolicit_event); - LASSERT (vvrc2 == vv_return_ok); - } - - if (vvrc == vv_return_ok && - kibnal_wreqid2type(wc.wr_id) == IBNAL_WID_RX) { - rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id); - - /* Grab the RX sequence number NOW before - * anyone else can get an RX completion */ - rxseq = rx->rx_conn->ibc_rxseq++; - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - /* give up ownership of completion polling */ - kibnal_data.kib_checking_cq = 0; - - if (vvrc == vv_return_err_cq_empty) - continue; - - LASSERT (vvrc == vv_return_ok); - /* Assume there's more: get another scheduler to check - * while I handle this completion... */ - - kibnal_data.kib_ready = 1; - wake_up(&kibnal_data.kib_sched_waitq); - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - switch (kibnal_wreqid2type(wc.wr_id)) { - case IBNAL_WID_RX: - kibnal_rx_complete( - (kib_rx_t *)kibnal_wreqid2ptr(wc.wr_id), - wc.completion_status, - wc.num_bytes_transfered, - rxseq); - break; - - case IBNAL_WID_TX: - kibnal_tx_complete( - (kib_tx_t *)kibnal_wreqid2ptr(wc.wr_id), - wc.completion_status); - break; - - case IBNAL_WID_RDMA: - /* We only get RDMA completion notification if - * it fails. So we just ignore them completely - * because... - * - * 1) If an RDMA fails, all subsequent work - * items, including the final SEND will fail - * too, so I'm still guaranteed to notice that - * this connection is hosed. - * - * 2) It's positively dangerous to look inside - * the tx descriptor obtained from an RDMA work - * item. As soon as I drop the kib_sched_lock, - * I give a scheduler on another CPU a chance - * to get the final SEND completion, so the tx - * descriptor can get freed as I inspect it. */ - CDEBUG(D_NETERROR, "RDMA failed: %d\n", - wc.completion_status); - break; - - default: - LBUG(); - } - - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - continue; - } - - /* Nothing to do; sleep... */ - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait); - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, - flags); - - schedule(); - - remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait); - set_current_state(TASK_RUNNING); - spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags); - } - - spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags); - - kibnal_thread_fini(); - return (0); -} diff --git a/lnet/klnds/viblnd/viblnd_modparams.c b/lnet/klnds/viblnd/viblnd_modparams.c deleted file mode 100644 index 1179d72960eed0fa57a677e868495aa4a67c15c4..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_modparams.c +++ /dev/null @@ -1,237 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "viblnd.h" - -static int service_number = 0x11b9a2; -CFS_MODULE_PARM(service_number, "i", int, 0444, - "IB service number"); - -static int min_reconnect_interval = 1; -CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644, - "minimum connection retry interval (seconds)"); - -static int max_reconnect_interval = 60; -CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644, - "maximum connection retry interval (seconds)"); - -static int concurrent_peers = 1152; -CFS_MODULE_PARM(concurrent_peers, "i", int, 0444, - "maximum number of peers that may connect"); - -static int cksum = 0; -CFS_MODULE_PARM(cksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -static int timeout = 50; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of message descriptors"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "# concurrent sends"); - -static int peer_credits = 8; -CFS_MODULE_PARM(peer_credits, "i", int, 0444, - "# concurrent sends to 1 peer"); - -static int arp_retries = 3; -CFS_MODULE_PARM(arp_retries, "i", int, 0644, - "# of times to retry ARP"); - -static char *hca_basename = "InfiniHost"; -CFS_MODULE_PARM(hca_basename, "s", charp, 0444, - "HCA base name"); - -static char *ipif_basename = "ipoib"; -CFS_MODULE_PARM(ipif_basename, "s", charp, 0444, - "IPoIB interface base name"); - -static int local_ack_timeout = 0x12; -CFS_MODULE_PARM(local_ack_timeout, "i", int, 0644, - "ACK timeout for low-level 'sends'"); - -static int retry_cnt = 7; -CFS_MODULE_PARM(retry_cnt, "i", int, 0644, - "Retransmissions when no ACK received"); - -static int rnr_cnt = 6; -CFS_MODULE_PARM(rnr_cnt, "i", int, 0644, - "RNR retransmissions"); - -static int rnr_nak_timer = 0x10; -CFS_MODULE_PARM(rnr_nak_timer, "i", int, 0644, - "RNR retransmission interval"); - -static int keepalive = 100; -CFS_MODULE_PARM(keepalive, "i", int, 0644, - "Idle time in seconds before sending a keepalive"); - -static int concurrent_sends = IBNAL_RX_MSGS; -CFS_MODULE_PARM(concurrent_sends, "i", int, 0644, - "send work-queue sizing"); - -#if IBNAL_USE_FMR -static int fmr_remaps = 1000; -CFS_MODULE_PARM(fmr_remaps, "i", int, 0444, - "FMR mappings allowed before unmap"); -#endif - -kib_tunables_t kibnal_tunables = { - .kib_service_number = &service_number, - .kib_min_reconnect_interval = &min_reconnect_interval, - .kib_max_reconnect_interval = &max_reconnect_interval, - .kib_concurrent_peers = &concurrent_peers, - .kib_cksum = &cksum, - .kib_timeout = &timeout, - .kib_ntx = &ntx, - .kib_credits = &credits, - .kib_peercredits = &peer_credits, - .kib_arp_retries = &arp_retries, - .kib_hca_basename = &hca_basename, - .kib_ipif_basename = &ipif_basename, - .kib_local_ack_timeout = &local_ack_timeout, - .kib_retry_cnt = &retry_cnt, - .kib_rnr_cnt = &rnr_cnt, - .kib_rnr_nak_timer = &rnr_nak_timer, - .kib_keepalive = &keepalive, - .kib_concurrent_sends = &concurrent_sends, -#if IBNAL_USE_FMR - .kib_fmr_remaps = &fmr_remaps, -#endif -}; - -#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM - -static char hca_basename_space[32]; -static char ipif_basename_space[32]; - -static ctl_table kibnal_ctl_table[] = { - {1, "service_number", &service_number, - sizeof(int), 0444, NULL, &proc_dointvec}, - {2, "min_reconnect_interval", &min_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {3, "max_reconnect_interval", &max_reconnect_interval, - sizeof(int), 0644, NULL, &proc_dointvec}, - {4, "concurrent_peers", &concurrent_peers, - sizeof(int), 0444, NULL, &proc_dointvec}, - {5, "cksum", &cksum, - sizeof(int), 0644, NULL, &proc_dointvec}, - {6, "timeout", &timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {7, "ntx", &ntx, - sizeof(int), 0444, NULL, &proc_dointvec}, - {8, "credits", &credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {9, "peer_credits", &peer_credits, - sizeof(int), 0444, NULL, &proc_dointvec}, - {10, "arp_retries", &arp_retries, - sizeof(int), 0644, NULL, &proc_dointvec}, - {11, "hca_basename", hca_basename_space, - sizeof(hca_basename_space), 0444, NULL, &proc_dostring}, - {12, "ipif_basename", ipif_basename_space, - sizeof(ipif_basename_space), 0444, NULL, &proc_dostring}, - {13, "local_ack_timeout", &local_ack_timeout, - sizeof(int), 0644, NULL, &proc_dointvec}, - {14, "retry_cnt", &retry_cnt, - sizeof(int), 0644, NULL, &proc_dointvec}, - {15, "rnr_cnt", &rnr_cnt, - sizeof(int), 0644, NULL, &proc_dointvec}, - {16, "rnr_nak_timer", &rnr_nak_timer, - sizeof(int), 0644, NULL, &proc_dointvec}, - {17, "keepalive", &keepalive, - sizeof(int), 0644, NULL, &proc_dointvec}, - {18, "concurrent_sends", &concurrent_sends, - sizeof(int), 0644, NULL, &proc_dointvec}, -#if IBNAL_USE_FMR - {19, "fmr_remaps", &fmr_remaps, - sizeof(int), 0444, NULL, &proc_dointvec}, -#endif - {0} -}; - -static ctl_table kibnal_top_ctl_table[] = { - {203, "vibnal", NULL, 0, 0555, kibnal_ctl_table}, - {0} -}; - -void -kibnal_initstrtunable(char *space, char *str, int size) -{ - strncpy(space, str, size); - space[size-1] = 0; -} - -int -kibnal_tunables_init () -{ - kibnal_initstrtunable(hca_basename_space, hca_basename, - sizeof(hca_basename_space)); - kibnal_initstrtunable(ipif_basename_space, ipif_basename, - sizeof(ipif_basename_space)); - - kibnal_tunables.kib_sysctl = - register_sysctl_table(kibnal_top_ctl_table, 0); - - if (kibnal_tunables.kib_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS) - *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS; - if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE) - *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE; - - return 0; -} - -void -kibnal_tunables_fini () -{ - if (kibnal_tunables.kib_sysctl != NULL) - unregister_sysctl_table(kibnal_tunables.kib_sysctl); -} - -#else - -int -kibnal_tunables_init () -{ - return 0; -} - -void -kibnal_tunables_fini () -{ -} - -#endif - - - - - - diff --git a/lnet/klnds/viblnd/viblnd_wire.h b/lnet/klnds/viblnd/viblnd_wire.h deleted file mode 100644 index 26242c185290702246089058bf8b728f3c4e27f8..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/viblnd_wire.h +++ /dev/null @@ -1,121 +0,0 @@ -/************************************************************************ - * IB Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -typedef struct kib_connparams -{ - __u32 ibcp_queue_depth; - __u32 ibcp_max_msg_size; - __u32 ibcp_max_frags; -} WIRE_ATTR kib_connparams_t; - -typedef struct -{ - lnet_hdr_t ibim_hdr; /* portals header */ - char ibim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kib_immediate_msg_t; - -#ifndef IBNAL_USE_FMR -# error "IBNAL_USE_FMR must be defined 1 or 0 before including this file" -#endif - -#if IBNAL_USE_FMR -typedef struct -{ - __u64 rd_addr; /* IO VMA address */ - __u32 rd_nob; /* # of bytes */ - __u32 rd_key; /* remote key */ -} WIRE_ATTR kib_rdma_desc_t; -#else -/* YEUCH! the __u64 address is split into 2 __u32 fields to ensure proper - * packing. Otherwise we can't fit enough frags into an IBNAL message (<= - * smallest page size on any arch). */ -typedef struct -{ - __u32 rf_nob; /* # of bytes */ - __u32 rf_addr_lo; /* lo 4 bytes of vaddr */ - __u32 rf_addr_hi; /* hi 4 bytes of vaddr */ -} WIRE_ATTR kib_rdma_frag_t; - -typedef struct -{ - __u32 rd_key; /* local/remote key */ - __u32 rd_nfrag; /* # fragments */ - kib_rdma_frag_t rd_frags[0]; /* buffer frags */ -} WIRE_ATTR kib_rdma_desc_t; -#endif - -typedef struct -{ - lnet_hdr_t ibprm_hdr; /* portals header */ - __u64 ibprm_cookie; /* opaque completion cookie */ -} WIRE_ATTR kib_putreq_msg_t; - -typedef struct -{ - __u64 ibpam_src_cookie; /* reflected completion cookie */ - __u64 ibpam_dst_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */ -} WIRE_ATTR kib_putack_msg_t; - -typedef struct -{ - lnet_hdr_t ibgm_hdr; /* portals header */ - __u64 ibgm_cookie; /* opaque completion cookie */ - kib_rdma_desc_t ibgm_rd; /* rdma descriptor */ -} WIRE_ATTR kib_get_msg_t; - -typedef struct -{ - __u64 ibcm_cookie; /* opaque completion cookie */ - __s32 ibcm_status; /* < 0 failure: >= 0 length */ -} WIRE_ATTR kib_completion_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ibm_magic; /* I'm an openibnal message */ - __u16 ibm_version; /* this is my version number */ - - __u8 ibm_type; /* msg type */ - __u8 ibm_credits; /* returned credits */ - __u32 ibm_nob; /* # bytes in whole message */ - __u32 ibm_cksum; /* checksum (0 == no checksum) */ - __u64 ibm_srcnid; /* sender's NID */ - __u64 ibm_srcstamp; /* sender's incarnation */ - __u64 ibm_dstnid; /* destination's NID */ - __u64 ibm_dststamp; /* destination's incarnation */ - __u64 ibm_seq; /* sequence number */ - - union { - kib_connparams_t connparams; - kib_immediate_msg_t immediate; - kib_putreq_msg_t putreq; - kib_putack_msg_t putack; - kib_get_msg_t get; - kib_completion_msg_t completion; - } WIRE_ATTR ibm_u; -} WIRE_ATTR kib_msg_t; - -#define IBNAL_MSG_MAGIC LNET_PROTO_VIB_MAGIC /* unique magic */ - -#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 0x10 /* previous version */ - -#define IBNAL_MSG_VERSION 0x11 /* current version */ - -#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */ -#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */ -#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */ -#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */ -#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */ -#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */ -#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */ -#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */ -#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */ -#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */ - -/* connection rejection reasons */ -#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */ -#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */ -#define IBNAL_REJECT_FATAL 2 /* Anything else */ diff --git a/lnet/klnds/viblnd/wirecheck.c b/lnet/klnds/viblnd/wirecheck.c deleted file mode 100644 index 5a0e060a4ee6102f8cd6e5b07fb5a7e0e5f5ab6b..0000000000000000000000000000000000000000 --- a/lnet/klnds/viblnd/wirecheck.c +++ /dev/null @@ -1,227 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <string.h> -#include <sys/types.h> -#include <sys/wait.h> - -#include <lnet/api-support.h> - -/* This ghastly hack to allows me to include lib-types.h It doesn't affect any - * assertions generated here (but fails-safe if it ever does) */ -typedef struct { - int counter; -} atomic_t; - -#include <lnet/lib-types.h> - -#define IBNAL_USE_FMR 1 -#include "viblnd_wire.h" - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#undef STRINGIFY -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[80]; - char gccinfo[80]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void vibnal_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (IBNAL_MSG_MAGIC); - CHECK_DEFINE (IBNAL_MSG_VERSION); - - CHECK_DEFINE (IBNAL_MSG_CONNREQ); - CHECK_DEFINE (IBNAL_MSG_CONNACK); - CHECK_DEFINE (IBNAL_MSG_NOOP); - CHECK_DEFINE (IBNAL_MSG_IMMEDIATE); - CHECK_DEFINE (IBNAL_MSG_PUT_REQ); - CHECK_DEFINE (IBNAL_MSG_PUT_NAK); - CHECK_DEFINE (IBNAL_MSG_PUT_ACK); - CHECK_DEFINE (IBNAL_MSG_PUT_DONE); - CHECK_DEFINE (IBNAL_MSG_GET_REQ); - CHECK_DEFINE (IBNAL_MSG_GET_DONE); - - CHECK_DEFINE (IBNAL_REJECT_CONN_RACE); - CHECK_DEFINE (IBNAL_REJECT_NO_RESOURCES); - CHECK_DEFINE (IBNAL_REJECT_FATAL); - - CHECK_STRUCT (kib_connparams_t); - CHECK_MEMBER (kib_connparams_t, ibcp_queue_depth); - CHECK_MEMBER (kib_connparams_t, ibcp_max_msg_size); - CHECK_MEMBER (kib_connparams_t, ibcp_max_frags); - - CHECK_STRUCT (kib_immediate_msg_t); - CHECK_MEMBER (kib_immediate_msg_t, ibim_hdr); - CHECK_MEMBER (kib_immediate_msg_t, ibim_payload[13]); - - CHECK_DEFINE (IBNAL_USE_FMR); -#if IBNAL_USE_FMR - CHECK_STRUCT (kib_rdma_desc_t); - CHECK_MEMBER (kib_rdma_desc_t, rd_addr); - CHECK_MEMBER (kib_rdma_desc_t, rd_nob); - CHECK_MEMBER (kib_rdma_desc_t, rd_key); -#else - CHECK_STRUCT (kib_rdma_frag_t); - CHECK_MEMBER (kib_rdma_frag_t, rf_nob); - CHECK_MEMBER (kib_rdma_frag_t, rf_addr_lo); - CHECK_MEMBER (kib_rdma_frag_t, rf_addr_hi); - - CHECK_STRUCT (kib_rdma_desc_t); - CHECK_MEMBER (kib_rdma_desc_t, rd_key); - CHECK_MEMBER (kib_rdma_desc_t, rd_nfrag); - CHECK_MEMBER (kib_rdma_desc_t, rd_frags[13]); -#endif - CHECK_STRUCT (kib_putreq_msg_t); - CHECK_MEMBER (kib_putreq_msg_t, ibprm_hdr); - CHECK_MEMBER (kib_putreq_msg_t, ibprm_cookie); - - CHECK_STRUCT (kib_putack_msg_t); - CHECK_MEMBER (kib_putack_msg_t, ibpam_src_cookie); - CHECK_MEMBER (kib_putack_msg_t, ibpam_dst_cookie); - CHECK_MEMBER (kib_putack_msg_t, ibpam_rd); - - CHECK_STRUCT (kib_get_msg_t); - CHECK_MEMBER (kib_get_msg_t, ibgm_hdr); - CHECK_MEMBER (kib_get_msg_t, ibgm_cookie); - CHECK_MEMBER (kib_get_msg_t, ibgm_rd); - - CHECK_STRUCT (kib_completion_msg_t); - CHECK_MEMBER (kib_completion_msg_t, ibcm_cookie); - CHECK_MEMBER (kib_completion_msg_t, ibcm_status); - - CHECK_STRUCT (kib_msg_t); - CHECK_MEMBER (kib_msg_t, ibm_magic); - CHECK_MEMBER (kib_msg_t, ibm_version); - CHECK_MEMBER (kib_msg_t, ibm_type); - CHECK_MEMBER (kib_msg_t, ibm_credits); - CHECK_MEMBER (kib_msg_t, ibm_nob); - CHECK_MEMBER (kib_msg_t, ibm_cksum); - CHECK_MEMBER (kib_msg_t, ibm_srcnid); - CHECK_MEMBER (kib_msg_t, ibm_srcstamp); - CHECK_MEMBER (kib_msg_t, ibm_dstnid); - CHECK_MEMBER (kib_msg_t, ibm_dststamp); - CHECK_MEMBER (kib_msg_t, ibm_seq); - CHECK_MEMBER (kib_msg_t, ibm_u.connparams); - CHECK_MEMBER (kib_msg_t, ibm_u.immediate); - CHECK_MEMBER (kib_msg_t, ibm_u.putreq); - CHECK_MEMBER (kib_msg_t, ibm_u.putack); - CHECK_MEMBER (kib_msg_t, ibm_u.get); - CHECK_MEMBER (kib_msg_t, ibm_u.completion); - - printf ("}\n\n"); - - return (0); -} diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore deleted file mode 100644 index c6f0aa426764bcd9e528d8671ea178383befe151..0000000000000000000000000000000000000000 --- a/lnet/libcfs/.cvsignore +++ /dev/null @@ -1,11 +0,0 @@ -.deps -Makefile -link-stamp -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/libcfs/Info.plist b/lnet/libcfs/Info.plist deleted file mode 100644 index aaf9b2f1aa2bc5a86befc758b17ec1e0f4133ba3..0000000000000000000000000000000000000000 --- a/lnet/libcfs/Info.plist +++ /dev/null @@ -1,35 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>libcfs</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.libcfs</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - </dict> -</dict> -</plist> diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in deleted file mode 100644 index 0940a567a9c738be60ace0caee07296cfb832caf..0000000000000000000000000000000000000000 --- a/lnet/libcfs/Makefile.in +++ /dev/null @@ -1,33 +0,0 @@ -MODULES = libcfs - -libcfs-linux-objs := linux-tracefile.o linux-debug.o -libcfs-linux-objs += linux-prim.o linux-mem.o -libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o -libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o -libcfs-linux-objs += linux-utils.o linux-module.o - -ifeq ($(PATCHLEVEL),6) -libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) -endif - -default: all - -ifeq (@linux25@,no) -sources: - @for i in $(libcfs-linux-objs:%.o=%.c) ; do \ - echo "ln -s @srcdir@/linux/$$i ." ; \ - ln -sf @srcdir@/linux/$$i . || exit 1 ; \ - done - -else -sources: - -endif - -libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o - -libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) - -EXTRA_PRE_CFLAGS := -I@LUSTRE@/../lnet/libcfs - -@INCLUDE_RULES@ diff --git a/lnet/libcfs/autoMakefile.am b/lnet/libcfs/autoMakefile.am deleted file mode 100644 index 18381c10b52bd8b24eca588b747a2f44a1a43562..0000000000000000000000000000000000000000 --- a/lnet/libcfs/autoMakefile.am +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS := linux -if DARWIN -SUBDIRS += darwin -endif -DIST_SUBDIRS := $(SUBDIRS) - -if LIBLUSTRE -noinst_LIBRARIES= libcfs.a -libcfs_a_SOURCES= debug.c user-prim.c user-lock.c -libcfs_a_CPPFLAGS = $(LLCPPFLAGS) -libcfs_a_CFLAGS = $(LLCFLAGS) -endif - -if MODULES - -if LINUX -modulenet_DATA := libcfs$(KMODEXT) -endif - -if DARWIN -macos_PROGRAMS := libcfs - -nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \ - darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \ - darwin/darwin-tcpip.c darwin/darwin-utils.c \ - darwin/darwin-debug.c darwin/darwin-proc.c \ - darwin/darwin-tracefile.c darwin/darwin-module.c \ - debug.c module.c tracefile.c nidstrings.c watchdog.c - -libcfs_CFLAGS := $(EXTRA_KCFLAGS) -libcfs_LDFLAGS := $(EXTRA_KLDFLAGS) -libcfs_LDADD := $(EXTRA_KLIBS) - -plist_DATA := Info.plist - -install_data_hook := fix-kext-ownership - -endif - -endif - -install-data-hook: $(install_data_hook) - -EXTRA_DIST := Info.plist - -MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs -DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c user-lock.c diff --git a/lnet/libcfs/darwin/.cvsignore b/lnet/libcfs/darwin/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/libcfs/darwin/Makefile.am b/lnet/libcfs/darwin/Makefile.am deleted file mode 100644 index 3f2077b753119df6003403e95bb7f6217bb139b1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -EXTRA_DIST := \ - darwin-mem.c \ - darwin-proc.c \ - darwin-utils.c \ - darwin-debug.c \ - darwin-module.c \ - darwin-sync.c \ - darwin-fs.c \ - darwin-prim.c \ - darwin-tracefile.c \ - darwin-curproc.c \ - darwin-tcpip.c diff --git a/lnet/libcfs/darwin/darwin-curproc.c b/lnet/libcfs/darwin/darwin-curproc.c deleted file mode 100644 index e12394e4a6f9fc144c8bd5956b23e185397f7e8a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-curproc.c +++ /dev/null @@ -1,164 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API implementation for XNU kernel - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h) - * for XNU kernel. - */ - -static inline struct ucred *curproc_ucred(void) -{ -#ifdef __DARWIN8__ - return proc_ucred(current_proc()); -#else - return current_proc()->p_cred->pc_ucred; -#endif -} - -uid_t cfs_curproc_uid(void) -{ - return curproc_ucred()->cr_uid; -} - -gid_t cfs_curproc_gid(void) -{ - LASSERT(curproc_ucred()->cr_ngroups > 0); - return curproc_ucred()->cr_groups[0]; -} - -uid_t cfs_curproc_fsuid(void) -{ -#ifdef __DARWIN8__ - return curproc_ucred()->cr_ruid; -#else - return current_proc()->p_cred->p_ruid; -#endif -} - -gid_t cfs_curproc_fsgid(void) -{ -#ifdef __DARWIN8__ - return curproc_ucred()->cr_rgid; -#else - return current_proc()->p_cred->p_rgid; -#endif -} - -pid_t cfs_curproc_pid(void) -{ -#ifdef __DARWIN8__ - /* no pid for each thread, return address of thread struct */ - return (pid_t)current_thread(); -#else - return current_proc()->p_pid; -#endif -} - -int cfs_curproc_groups_nr(void) -{ - LASSERT(curproc_ucred()->cr_ngroups > 0); - return curproc_ucred()->cr_ngroups - 1; -} - -int cfs_curproc_is_in_groups(gid_t gid) -{ - int i; - struct ucred *cr; - - cr = curproc_ucred(); - LASSERT(cr != NULL); - - for (i = 0; i < cr->cr_ngroups; ++ i) { - if (cr->cr_groups[i] == gid) - return 1; - } - return 0; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ - struct ucred *cr; - - cr = curproc_ucred(); - LASSERT(cr != NULL); - CLASSERT(sizeof array[0] == sizeof (__u32)); - - size = min_t(int, size, cr->cr_ngroups); - memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t)); -} - -mode_t cfs_curproc_umask(void) -{ -#ifdef __DARWIN8__ - /* - * XXX Liang: - * - * fd_cmask is not available in kexts, so we just assume - * verything is permited. - */ - return -1; -#else - return current_proc()->p_fd->fd_cmask; -#endif -} - -char *cfs_curproc_comm(void) -{ -#ifdef __DARWIN8__ - /* - * Writing to proc->p_comm is not permited in Darwin8, - * because proc_selfname() only return a copy of proc->p_comm, - * so this function is not really working while user try to - * change comm of current process. - */ - static char pcomm[MAXCOMLEN+1]; - - proc_selfname(pcomm, MAXCOMLEN+1); - return pcomm; -#else - return current_proc()->p_comm; -#endif -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return -1; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - return; -} - - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/darwin/darwin-debug.c b/lnet/libcfs/darwin/darwin-debug.c deleted file mode 100644 index 2152d4052916d37c5bdf7c02bcf526f5c75fcd8a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-debug.c +++ /dev/null @@ -1,77 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -void libcfs_debug_dumpstack(cfs_task_t *tsk) -{ - return; -} - -void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) -{ -} - -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - CEMERG("LBUG: pid: %u thread: %#x\n", - (unsigned)cfs_curproc_pid(), (unsigned)current_thread()); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - while (1) - cfs_schedule(); - - /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */ -} - -#if ENTRY_NESTING_SUPPORT - -static inline struct cfs_debug_data *__current_cdd(void) -{ - struct cfs_debug_data *cdd; - - cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo; - if (cdd != NULL && - cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 && - cdd->nesting_level < 1000) - return cdd; - else - return NULL; -} - -static inline void __current_cdd_set(struct cfs_debug_data *cdd) -{ - current_uthread()->uu_nlminfo = (void *)cdd; -} - -void __entry_nesting(struct cfs_debug_data *child) -{ - struct cfs_debug_data *parent; - - parent = __current_cdd(); - if (parent != NULL) { - child->parent = parent; - child->nesting_level = parent->nesting_level + 1; - } - __current_cdd_set(child); -} - -void __exit_nesting(struct cfs_debug_data *child) -{ - __current_cdd_set(child->parent); -} - -unsigned int __current_nesting_level(void) -{ - struct cfs_debug_data *cdd; - - cdd = __current_cdd(); - if (cdd != NULL) - return cdd->nesting_level; - else - return 0; -} -/* ENTRY_NESTING_SUPPORT */ -#endif diff --git a/lnet/libcfs/darwin/darwin-fs.c b/lnet/libcfs/darwin/darwin-fs.c deleted file mode 100644 index 45f37df11fc57c027b2cf1e3fc6f8ebec3bf8df2..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-fs.c +++ /dev/null @@ -1,466 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/malloc.h> -#include <sys/conf.h> -#include <sys/mount.h> -#include <sys/uio.h> -#include <sys/filedesc.h> -#include <sys/namei.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Kernel APIs for file system in xnu - * - * Public functions - */ - -#ifdef __DARWIN8__ -#include <sys/vnode.h> - -extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t); - -/* vnode_size() is not exported */ -static errno_t -vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) -{ - struct vnode_attr va; - int error; - - VATTR_INIT(&va); - VATTR_WANTED(&va, va_data_size); - error = vnode_getattr(vp, &va, ctx); - if (!error) - *sizep = va.va_data_size; - return(error); -} - -/* - * XXX Liang: - * - * kern_file_*() are not safe for multi-threads now, - * however, we need them only for tracefiled, so it's - * not so important to implement for MT. - */ -int -kern_file_size(struct cfs_kern_file *fp, off_t *psize) -{ - int error; - off_t size; - - error = vnode_size(fp->f_vp, &size, fp->f_ctxt); - if (error) - return error; - - if (psize) - *psize = size; - return 0; -} - -struct cfs_kern_file * -kern_file_open(const char * filename, int uflags, int mode, int *err) -{ - struct cfs_kern_file *fp; - vnode_t vp; - int error; - - fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK); - if (fp == NULL) { - if (err != NULL) - *err = -ENOMEM; - return NULL; - } - fp->f_flags = FFLAGS(uflags); - fp->f_ctxt = vfs_context_create(NULL); - - if ((error = vnode_open(filename, fp->f_flags, - mode, 0, &vp, fp->f_ctxt))){ - if (err != NULL) - *err = -error; - _FREE(fp, M_TEMP); - } else { - if (err != NULL) - *err = 0; - fp->f_vp = vp; - } - - return fp; -} - -int -kern_file_close(struct cfs_kern_file *fp) -{ - vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt); - vfs_context_rele(fp->f_ctxt); - _FREE(fp, M_TEMP); - - return 0; -} - -int -kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) -{ - struct proc *p = current_proc(); - int resid; - int error; - - assert(buf != NULL); - assert(fp != NULL && fp->f_vp != NULL); - - error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos, - UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); - if ((error) || (nbytes == resid)) { - if (!error) - error = -EINVAL; - return error; - } - *pos += nbytes - resid; - - return (int)(nbytes - resid); -} - -int -kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) -{ - struct proc *p = current_proc(); - int resid; - int error; - - assert(buf != NULL); - assert(fp != NULL && fp->f_vp != NULL); - - error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos, - UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); - if ((error) || (nbytes == resid)) { - if (!error) - error = -EINVAL; - return error; - } - *pos += nbytes - resid; - - return (int)(nbytes - resid); - -} - -int -kern_file_sync (struct cfs_kern_file *fp) -{ - return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt); -} - -#else /* !__DARWIN8__ */ - -int -kern_file_size(struct file *fp, off_t *size) -{ - struct vnode *vp = (struct vnode *)fp->f_data; - struct stat sb; - int rc; - - rc = vn_stat(vp, &sb, current_proc()); - if (rc) { - *size = 0; - return rc; - } - *size = sb.st_size; - return 0; -} - -cfs_file_t * -kern_file_open(const char * filename, int flags, int mode, int *err) -{ - struct nameidata nd; - cfs_file_t *fp; - register struct vnode *vp; - int rc; - extern struct fileops vnops; - extern int nfiles; - CFS_DECL_CONE_DATA; - - CFS_CONE_IN; - nfiles++; - MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO); - bzero(fp, sizeof(cfs_file_t)); - fp->f_count = 1; - LIST_CIRCLE(fp, f_list); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc()); - if ((rc = vn_open(&nd, flags, mode)) != 0){ - printf("filp_open failed at (%d)\n", rc); - if (err != NULL) - *err = rc; - FREE_ZONE(fp, sizeof *fp, M_FILE); - CFS_CONE_EX; - return NULL; - } - vp = nd.ni_vp; - fp->f_flag = flags & FMASK; - fp->f_type = DTYPE_VNODE; - fp->f_ops = &vnops; - fp->f_data = (caddr_t)vp; - fp->f_cred = current_proc()->p_ucred; - /* - * Hold cred to increase reference - */ - crhold(fp->f_cred); - /* - * vnode is locked inside vn_open for lookup, - * we should release the lock before return - */ - VOP_UNLOCK(vp, 0, current_proc()); - CFS_CONE_EX; - - return fp; -} - -static int -frele_internal(cfs_file_t *fp) -{ - if (fp->f_count == (short)0xffff) - panic("frele of lustre: stale"); - if (--fp->f_count < 0) - panic("frele of lustre: count < 0"); - return ((int)fp->f_count); -} - -int -kern_file_close (cfs_file_t *fp) -{ - struct vnode *vp; - CFS_DECL_CONE_DATA; - - if (fp == NULL) - return 0; - - CFS_CONE_IN; - if (frele_internal(fp) > 0) - goto out; - vp = (struct vnode *)fp->f_data; - (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc()); - /* - * ffree(fp); - * Dont use ffree to release fp!!!! - * ffree will call LIST_REMOVE(fp), - * but fp is not in any list, this will - * cause kernel panic - */ - struct ucred *cred; - cred = fp->f_cred; - if (cred != NOCRED) { - fp->f_cred = NOCRED; - crfree(cred); - } - extern int nfiles; - nfiles--; - memset(fp, 0xff, sizeof *fp); - fp->f_count = (short)0xffff; - FREE_ZONE(fp, sizeof *fp, M_FILE); -out: - CFS_CONE_EX; - return 0; -} - -extern void bwillwrite(void); - -/* - * Write buffer to filp inside kernel - */ -int -kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) -{ - struct uio auio; - struct iovec aiov; - struct proc *p = current_proc(); - long cnt, error = 0; - int flags = 0; - CFS_DECL_CONE_DATA; - - aiov.iov_base = (void *)(uintptr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - if (pos != NULL) { - auio.uio_offset = *pos; - /* - * Liang: If don't set FOF_OFFSET, vn_write() - * will use fp->f_offset as the the real offset. - * Same in vn_read() - */ - flags |= FOF_OFFSET; - } else - auio.uio_offset = (off_t)-1; - if (nbyte > INT_MAX) - return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - cnt = nbyte; - CFS_CONE_IN; - if (fp->f_type == DTYPE_VNODE) - bwillwrite(); /* empty stuff now */ - if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { - if (auio.uio_resid != cnt && (error == ERESTART ||\ - error == EINTR || error == EWOULDBLOCK)) - error = 0; - /* The socket layer handles SIGPIPE */ - if (error == EPIPE && fp->f_type != DTYPE_SOCKET) - psignal(p, SIGPIPE); - } - CFS_CONE_EX; - if (error != 0) - cnt = -error; - else - cnt -= auio.uio_resid; - if (pos != NULL) - *pos += cnt; - return cnt; -} - -/* - * Read from filp inside kernel - */ -int -kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) -{ - struct uio auio; - struct iovec aiov; - struct proc *p = current_proc(); - long cnt, error = 0; - int flags = 0; - CFS_DECL_CONE_DATA; - - aiov.iov_base = (caddr_t)buf; - aiov.iov_len = nbyte; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - if (pos != NULL) { - auio.uio_offset = *pos; - flags |= FOF_OFFSET; - } else - auio.uio_offset = (off_t)-1; - if (nbyte > INT_MAX) - return (EINVAL); - auio.uio_resid = nbyte; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - cnt = nbyte; - CFS_CONE_IN; - if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) { - if (auio.uio_resid != cnt && (error == ERESTART || - error == EINTR || error == EWOULDBLOCK)) - error = 0; - } - CFS_CONE_EX; - if (error != 0) - cnt = -error; - else - cnt -= auio.uio_resid; - if (pos != NULL) - *pos += cnt; - - return cnt; -} - -int -kern_file_sync (cfs_file_t *fp) -{ - struct vnode *vp = (struct vnode *)fp->f_data; - struct proc *p = current_proc(); - int error = 0; - CFS_DECL_CONE_DATA; - - CFS_CONE_IN; - if (fref(fp) == -1) { - CFS_CONE_EX; - return (-EBADF); - } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); - VOP_UNLOCK(vp, 0, p); - frele(fp); - CFS_CONE_EX; - - return error; -} - -#endif /* !__DARWIN8__ */ - -cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor) -{ - return makedev(major, minor); -} - -cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev) -{ - return major(rdev); -} - -cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev) -{ - return minor(rdev); -} - -struct posix_acl *posix_acl_alloc(int count, int flags) -{ - static struct posix_acl acl; - return &acl; -} - -/* - * XXX Liang: I've not converted all of them, - * more is needed? - */ -int cfs_oflags2univ(int flags) -{ - int f; - - f = flags & O_ACCMODE; - f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; - f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; - f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; - f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; - f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; - f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0; - f |= (flags & O_SYNC)? CFS_O_SYNC: 0; - return f; -} - -/* - * XXX Liang: we don't need it in OSX. - * But it should be implemented anyway. - */ -int cfs_univ2oflags(int flags) -{ - return flags; -} diff --git a/lnet/libcfs/darwin/darwin-internal.h b/lnet/libcfs/darwin/darwin-internal.h deleted file mode 100644 index 6c83577cd4420efee050b36dee8925f78a760585..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-internal.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LIBCFS_DARWIN_INTERNAL_H__ -#define __LIBCFS_DARWIN_INTERNAL_H__ - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <sys/sysctl.h> - -int cfs_sysctl_isvalid(void); -struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int (*handler) SYSCTL_HANDLER_ARGS); -struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n, - const char *name, int *ptr, int val); -struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val); -struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, char *ptr, int len); -struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *ptr, int size); - -#endif diff --git a/lnet/libcfs/darwin/darwin-mem.c b/lnet/libcfs/darwin/darwin-mem.c deleted file mode 100644 index 3079a56e95bbe298177691afc432865f9d0143c0..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-mem.c +++ /dev/null @@ -1,480 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Liang Zhen <liangzhen@clusterfs.com> - * Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/malloc.h> - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "darwin-internal.h" - -#if CFS_INDIVIDUAL_ZONE -extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *); -extern void * zalloc(zone_t zone); -extern void *zalloc_noblock(zone_t zone); -extern void zfree(zone_t zone, void *addr); - -struct cfs_zone_nob { - struct list_head *z_nob; /* Pointer to z_link */ - struct list_head z_link; /* Do NOT access it directly */ -}; - -static struct cfs_zone_nob cfs_zone_nob; -static spinlock_t cfs_zone_guard; - -cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize) -{ - cfs_mem_cache_t *walker = NULL; - - LASSERT(cfs_zone_nob.z_nob != NULL); - - spin_lock(&cfs_zone_guard); - list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) { - if (!strcmp(walker->mc_name, name) && \ - walker->mc_size == objsize) - break; - } - spin_unlock(&cfs_zone_guard); - - return walker; -} - -/* - * our wrapper around kern/zalloc.c:zinit() - * - * Creates copy of name and calls zinit() to do real work. Needed because zone - * survives kext unloading, so that @name cannot be just static string - * embedded into kext image. - */ -cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name) -{ - cfs_mem_cache_t *mc = NULL; - char *cname; - - MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); - if (mc == NULL){ - CERROR("cfs_mem_cache created fail!\n"); - return NULL; - } - - cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK); - LASSERT(cname != NULL); - mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name)); - mc->mc_size = objsize; - CFS_INIT_LIST_HEAD(&mc->mc_link); - strncpy(mc->mc_name, name, 1 + strlen(name)); - return mc; -} - -void mem_cache_destroy(cfs_mem_cache_t *mc) -{ - /* - * zone can NOT be destroyed after creating, - * so just keep it in list. - * - * We will not lost a zone after we unload - * libcfs, it can be found by from libcfs.zone - */ - return; -} - -#define mem_cache_alloc(mc) zalloc((mc)->mc_cache) -#ifdef __DARWIN8__ -# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache) -#else -/* XXX Liang: Tiger doesn't export zalloc_noblock() */ -# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache) -#endif -#define mem_cache_free(mc, p) zfree((mc)->mc_cache, p) - -#else /* !CFS_INDIVIDUAL_ZONE */ - -cfs_mem_cache_t * -mem_cache_find(const char *name, size_t objsize) -{ - return NULL; -} - -cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name) -{ - cfs_mem_cache_t *mc = NULL; - - MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); - if (mc == NULL){ - CERROR("cfs_mem_cache created fail!\n"); - return NULL; - } - mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT); - mc->mc_size = size; - return mc; -} - -void mem_cache_destroy(cfs_mem_cache_t *mc) -{ - OSMalloc_Tagfree(mc->mc_cache); - FREE(mc, M_TEMP); -} - -#define mem_cache_alloc(mc) OSMalloc((mc)->mc_size, (mc)->mc_cache) -#define mem_cache_alloc_nb(mc) OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache) -#define mem_cache_free(mc, p) OSFree(p, (mc)->mc_size, (mc)->mc_cache) - -#endif /* !CFS_INDIVIDUAL_ZONE */ - -cfs_mem_cache_t * -cfs_mem_cache_create (const char *name, - size_t objsize, size_t off, unsigned long arg1) -{ - cfs_mem_cache_t *mc; - - mc = mem_cache_find(name, objsize); - if (mc) - return mc; - mc = mem_cache_create(objsize, name); - return mc; -} - -int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep) -{ - mem_cache_destroy(cachep); - return 0; -} - -void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags) -{ - void *result; - - /* zalloc_canblock() is not exported... Emulate it. */ - if (flags & CFS_ALLOC_ATOMIC) { - result = (void *)mem_cache_alloc_nb(cachep); - } else { - LASSERT(get_preemption_level() == 0); - result = (void *)mem_cache_alloc(cachep); - } - if (result != NULL && (flags & CFS_ALLOC_ZERO)) - memset(result, 0, cachep->mc_size); - - return result; -} - -void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp) -{ - mem_cache_free(cachep, objp); -} - -/* --------------------------------------------------------------------------- - * Page operations - * - * --------------------------------------------------------------------------- */ - -/* - * "Raw" pages - */ - -static unsigned int raw_pages = 0; -static cfs_mem_cache_t *raw_page_cache = NULL; - -static struct xnu_page_ops raw_page_ops; -static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = { - [XNU_PAGE_RAW] = &raw_page_ops -}; - -#if defined(LIBCFS_DEBUG) -static int page_type_is_valid(cfs_page_t *page) -{ - LASSERT(page != NULL); - return 0 <= page->type && page->type < XNU_PAGE_NTYPES; -} - -static int page_is_raw(cfs_page_t *page) -{ - return page->type == XNU_PAGE_RAW; -} -#endif - -static struct xnu_raw_page *as_raw(cfs_page_t *page) -{ - LASSERT(page_is_raw(page)); - return list_entry(page, struct xnu_raw_page, header); -} - -static void *raw_page_address(cfs_page_t *pg) -{ - return (void *)as_raw(pg)->virtual; -} - -static void *raw_page_map(cfs_page_t *pg) -{ - return (void *)as_raw(pg)->virtual; -} - -static void raw_page_unmap(cfs_page_t *pg) -{ -} - -static struct xnu_page_ops raw_page_ops = { - .page_map = raw_page_map, - .page_unmap = raw_page_unmap, - .page_address = raw_page_address -}; - -extern int get_preemption_level(void); - -struct list_head page_death_row; -spinlock_t page_death_row_phylax; - -static void raw_page_finish(struct xnu_raw_page *pg) -{ - -- raw_pages; - if (pg->virtual != NULL) - cfs_mem_cache_free(raw_page_cache, pg->virtual); - cfs_free(pg); -} - -void raw_page_death_row_clean(void) -{ - struct xnu_raw_page *pg; - - spin_lock(&page_death_row_phylax); - while (!list_empty(&page_death_row)) { - pg = container_of(page_death_row.next, - struct xnu_raw_page, link); - list_del(&pg->link); - spin_unlock(&page_death_row_phylax); - raw_page_finish(pg); - spin_lock(&page_death_row_phylax); - } - spin_unlock(&page_death_row_phylax); -} - -/* Free a "page" */ -void free_raw_page(struct xnu_raw_page *pg) -{ - if (!atomic_dec_and_test(&pg->count)) - return; - /* - * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may - * block. (raw_page_done()->upl_abort() can block too) On the other - * hand, cfs_free_page() may be called in non-blockable context. To - * work around this, park pages on global list when cannot block. - */ - if (get_preemption_level() > 0) { - spin_lock(&page_death_row_phylax); - list_add(&pg->link, &page_death_row); - spin_unlock(&page_death_row_phylax); - } else { - raw_page_finish(pg); - raw_page_death_row_clean(); - } -} - -cfs_page_t *cfs_alloc_page(u_int32_t flags) -{ - struct xnu_raw_page *page; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - page = cfs_alloc(sizeof *page, flags); - if (page != NULL) { - page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags); - if (page->virtual != NULL) { - ++ raw_pages; - page->header.type = XNU_PAGE_RAW; - atomic_set(&page->count, 1); - } else { - cfs_free(page); - page = NULL; - } - } - return page != NULL ? &page->header : NULL; -} - -void cfs_free_page(cfs_page_t *pages) -{ - free_raw_page(as_raw(pages)); -} - -void cfs_get_page(cfs_page_t *p) -{ - atomic_inc(&as_raw(p)->count); -} - -int cfs_put_page_testzero(cfs_page_t *p) -{ - return atomic_dec_and_test(&as_raw(p)->count); -} - -int cfs_page_count(cfs_page_t *p) -{ - return atomic_read(&as_raw(p)->count); -} - -/* - * Generic page operations - */ - -void *cfs_page_address(cfs_page_t *pg) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_address(pg); -} - -void *cfs_kmap(cfs_page_t *pg) -{ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_map(pg); -} - -void cfs_kunmap(cfs_page_t *pg) -{ - LASSERT(page_type_is_valid(pg)); - return page_ops[pg->type]->page_unmap(pg); -} - -void xnu_page_ops_register(int type, struct xnu_page_ops *ops) -{ - LASSERT(0 <= type && type < XNU_PAGE_NTYPES); - LASSERT(ops != NULL); - LASSERT(page_ops[type] == NULL); - - page_ops[type] = ops; -} - -void xnu_page_ops_unregister(int type) -{ - LASSERT(0 <= type && type < XNU_PAGE_NTYPES); - LASSERT(page_ops[type] != NULL); - - page_ops[type] = NULL; -} - -/* - * Portable memory allocator API - */ -#ifdef HAVE_GET_PREEMPTION_LEVEL -extern int get_preemption_level(void); -#else -#define get_preemption_level() (0) -#endif - -void *cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - int mflags; - - mflags = 0; - if (flags & CFS_ALLOC_ATOMIC) { - mflags |= M_NOWAIT; - } else { - LASSERT(get_preemption_level() == 0); - mflags |= M_WAITOK; - } - - if (flags & CFS_ALLOC_ZERO) - mflags |= M_ZERO; - - return _MALLOC(nr_bytes, M_TEMP, mflags); -} - -void cfs_free(void *addr) -{ - return _FREE(addr, M_TEMP); -} - -void *cfs_alloc_large(size_t nr_bytes) -{ - LASSERT(get_preemption_level() == 0); - return _MALLOC(nr_bytes, M_TEMP, M_WAITOK); -} - -void cfs_free_large(void *addr) -{ - LASSERT(get_preemption_level() == 0); - return _FREE(addr, M_TEMP); -} - -/* - * Lookup cfs_zone_nob by sysctl.zone, if it cannot be - * found (first load of * libcfs since boot), allocate - * sysctl libcfs.zone. - */ -int cfs_mem_init(void) -{ -#if CFS_INDIVIDUAL_ZONE - int rc; - size_t len; - - len = sizeof(struct cfs_zone_nob); - rc = sysctlbyname("libcfs.zone", - (void *)&cfs_zone_nob, &len, NULL, 0); - if (rc == ENOENT) { - /* zone_nob is not register in libcfs_sysctl */ - struct cfs_zone_nob *nob; - struct sysctl_oid *oid; - - assert(cfs_sysctl_isvalid()); - - nob = _MALLOC(sizeof(struct cfs_zone_nob), - M_TEMP, M_WAITOK | M_ZERO); - CFS_INIT_LIST_HEAD(&nob->z_link); - nob->z_nob = &nob->z_link; - oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - "zone", nob, sizeof(struct cfs_zone_nob)); - if (oid == NULL) { - _FREE(nob, M_TEMP); - return -ENOMEM; - } - sysctl_register_oid(oid); - - cfs_zone_nob.z_nob = nob->z_nob; - } - spin_lock_init(&cfs_zone_guard); -#endif - CFS_INIT_LIST_HEAD(&page_death_row); - spin_lock_init(&page_death_row_phylax); - raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0); - return 0; -} - -void cfs_mem_fini(void) -{ - raw_page_death_row_clean(); - spin_lock_done(&page_death_row_phylax); - cfs_mem_cache_destroy(raw_page_cache); - -#if CFS_INDIVIDUAL_ZONE - cfs_zone_nob.z_nob = NULL; - spin_lock_done(&cfs_zone_guard); -#endif -} diff --git a/lnet/libcfs/darwin/darwin-module.c b/lnet/libcfs/darwin/darwin-module.c deleted file mode 100644 index 10cb7d842f53df0bafc5c144085a90b36dd6ad72..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-module.c +++ /dev/null @@ -1,191 +0,0 @@ -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <miscfs/devfs/devfs.h> - -#define DEBUG_SUBSYSTEM S_LNET -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err = 0; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - /* libcfs_ioctl_data has been copied in by ioctl of osx */ - memcpy(buf, arg, sizeof(struct libcfs_ioctl_data)); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR("LIBCFS: version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR("LIBCFS: user buffer exceeds kernel buffer\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR("LIBCFS: user buffer too small for ioctl\n"); - RETURN(-EINVAL); - } - buf += size_round(sizeof(*data)); - - if (data->ioc_inllen1) { - err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1)); - if (err) - RETURN(err); - data->ioc_inlbuf1 = buf; - buf += size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen2) { - copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2)); - if (err) - RETURN(err); - data->ioc_inlbuf2 = buf; - } - - RETURN(err); -} - -int libcfs_ioctl_popdata(void *arg, void *data, int size) -{ - /* - * system call will copy out ioctl arg to user space - */ - memcpy(arg, data, size); - return 0; -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; -struct libcfs_device_userstate *mdev_state[16]; - -static int -libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p) -{ - struct libcfs_device_userstate *mstat = NULL; - int rc = 0; - int devid; - devid = minor(dev); - - if (devid > 16) return (ENXIO); - - if (libcfs_psdev_ops.p_open != NULL) - rc = -libcfs_psdev_ops.p_open(0, &mstat); - else - rc = EPERM; - if (rc == 0) - mdev_state[devid] = mstat; - return rc; -} - -static int -libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p) -{ - int devid; - devid = minor(dev); - int rc = 0; - - if (devid > 16) return (ENXIO); - - if (libcfs_psdev_ops.p_close != NULL) - rc = -libcfs_psdev_ops.p_close(0, mdev_state[devid]); - else - rc = EPERM; - if (rc == 0) - mdev_state[devid] = NULL; - return rc; -} - -static int -libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p) -{ - int rc = 0; - struct cfs_psdev_file pfile; - int devid; - devid = minor(dev); - - if (devid > 16) return (ENXIO); - - if (!is_suser()) - return (EPERM); - - pfile.off = 0; - pfile.private_data = mdev_state[devid]; - - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = -libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = EPERM; - return rc; -} - -static struct cdevsw libcfs_devsw = -{ - .d_open = libcfs_psdev_open, - .d_close = libcfs_psdev_close, - .d_read = eno_rdwrt, - .d_write = eno_rdwrt, - .d_ioctl = libcfs_ioctl, - .d_stop = eno_stop, - .d_reset = eno_reset, - .d_ttys = NULL, - .d_select = eno_select, - .d_mmap = eno_mmap, - .d_strategy = eno_strat, - .d_getc = eno_getc, - .d_putc = eno_putc, - .d_type = 0 -}; - -cfs_psdev_t libcfs_dev = { - -1, - NULL, - "lnet", - &libcfs_devsw, - NULL -}; - -extern spinlock_t trace_cpu_serializer; -extern void cfs_sync_init(void); -extern void cfs_sync_fini(void); -extern int cfs_sysctl_init(void); -extern void cfs_sysctl_fini(void); -extern int cfs_mem_init(void); -extern int cfs_mem_fini(void); -extern void raw_page_death_row_clean(void); -extern void cfs_thread_agent_init(void); -extern void cfs_thread_agent_fini(void); -extern void cfs_symbol_init(void); -extern void cfs_symbol_fini(void); - -int libcfs_arch_init(void) -{ - cfs_sync_init(); - cfs_sysctl_init(); - cfs_mem_init(); - cfs_thread_agent_init(); - cfs_symbol_init(); - - spin_lock_init(&trace_cpu_serializer); - - return 0; -} - -void libcfs_arch_cleanup(void) -{ - spin_lock_done(&trace_cpu_serializer); - - cfs_symbol_fini(); - cfs_thread_agent_fini(); - cfs_mem_fini(); - cfs_sysctl_fini(); - cfs_sync_fini(); -} - diff --git a/lnet/libcfs/darwin/darwin-prim.c b/lnet/libcfs/darwin/darwin-prim.c deleted file mode 100644 index cdcabd94a6a35a7cc624c664d0e8e5addc821a56..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-prim.c +++ /dev/null @@ -1,581 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/file.h> -#include <sys/conf.h> -#include <sys/uio.h> -#include <sys/filedesc.h> -#include <sys/namei.h> -#include <miscfs/devfs/devfs.h> -#include <kern/thread.h> - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * cfs pseudo device, actually pseudo char device in darwin - */ -#define KLNET_MAJOR -1 - -kern_return_t cfs_psdev_register(cfs_psdev_t *dev) { - dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw); - if (dev->index < 0) { - printf("libcfs_init: failed to allocate a major number!\n"); - return KERN_FAILURE; - } - dev->handle = devfs_make_node(makedev (dev->index, 0), - DEVFS_CHAR, UID_ROOT, - GID_WHEEL, 0666, (char *)dev->name, 0); - return KERN_SUCCESS; -} - -kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) { - devfs_remove(dev->handle); - cdevsw_remove(dev->index, dev->devsw); - return KERN_SUCCESS; -} - -/* - * KPortal symbol register / unregister support - */ -struct rw_semaphore cfs_symbol_lock; -struct list_head cfs_symbol_list; - -void * -cfs_symbol_get(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref ++; - break; - } - } - up_read(&cfs_symbol_lock); - if (sym != NULL) - return sym->value; - return NULL; -} - -kern_return_t -cfs_symbol_put(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref --; - LASSERT(sym->ref >= 0); - break; - } - } - up_read(&cfs_symbol_lock); - LASSERT(sym != NULL); - - return 0; -} - -kern_return_t -cfs_symbol_register(const char *name, const void *value) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - struct cfs_symbol *new = NULL; - - MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO); - strncpy(new->name, name, CFS_SYMBOL_LEN); - new->value = (void *)value; - new->ref = 0; - CFS_INIT_LIST_HEAD(&new->sym_list); - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - up_write(&cfs_symbol_lock); - FREE(new, M_TEMP); - return KERN_NAME_EXISTS; - } - - } - list_add_tail(&new->sym_list, &cfs_symbol_list); - up_write(&cfs_symbol_lock); - - return KERN_SUCCESS; -} - -kern_return_t -cfs_symbol_unregister(const char *name) -{ - struct list_head *walker; - struct list_head *nxt; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each_safe(walker, nxt, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - FREE(sym, M_TEMP); - break; - } - } - up_write(&cfs_symbol_lock); - - return KERN_SUCCESS; -} - -void -cfs_symbol_init() -{ - CFS_INIT_LIST_HEAD(&cfs_symbol_list); - init_rwsem(&cfs_symbol_lock); -} - -void -cfs_symbol_fini() -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - FREE(sym, M_TEMP); - } - up_write(&cfs_symbol_lock); - - fini_rwsem(&cfs_symbol_lock); - return; -} - -struct kernel_thread_arg -{ - spinlock_t lock; - atomic_t inuse; - cfs_thread_t func; - void *arg; -}; - -struct kernel_thread_arg cfs_thread_arg; - -#define THREAD_ARG_FREE 0 -#define THREAD_ARG_HOLD 1 -#define THREAD_ARG_RECV 2 - -#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v) -#define get_targ_stat(a) atomic_read(&(a)->inuse) - -/* - * Hold the thread argument and set the status of thread_status - * to THREAD_ARG_HOLD, if the thread argument is held by other - * threads (It's THREAD_ARG_HOLD already), current-thread has to wait. - */ -#define thread_arg_hold(pta, _func, _arg) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_FREE) { \ - set_targ_stat((pta), THREAD_ARG_HOLD); \ - (pta)->arg = (void *)_arg; \ - (pta)->func = _func; \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while(1); \ - -/* - * Release the thread argument if the thread argument has been - * received by the child-thread (Status of thread_args is - * THREAD_ARG_RECV), otherwise current-thread has to wait. - * After release, the thread_args' status will be set to - * THREAD_ARG_FREE, and others can re-use the thread_args to - * create new kernel_thread. - */ -#define thread_arg_release(pta) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_RECV) { \ - (pta)->arg = NULL; \ - (pta)->func = NULL; \ - set_targ_stat(pta, THREAD_ARG_FREE); \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while(1) - -/* - * Receive thread argument (Used in child thread), set the status - * of thread_args to THREAD_ARG_RECV. - */ -#define __thread_arg_recv_fin(pta, _func, _arg, fin) \ - do { \ - spin_lock(&(pta)->lock); \ - if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \ - if (fin) \ - set_targ_stat(pta, THREAD_ARG_RECV);\ - _arg = (pta)->arg; \ - _func = (pta)->func; \ - spin_unlock(&(pta)->lock); \ - break; \ - } \ - spin_unlock(&(pta)->lock); \ - cfs_schedule(); \ - } while (1); \ - -/* - * Just set the thread_args' status to THREAD_ARG_RECV - */ -#define thread_arg_fin(pta) \ - do { \ - spin_lock(&(pta)->lock); \ - assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \ - set_targ_stat(pta, THREAD_ARG_RECV); \ - spin_unlock(&(pta)->lock); \ - } while(0) - -#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1) -#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0) - -void -cfs_thread_agent_init(void) -{ - set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE); - spin_lock_init(&cfs_thread_arg.lock); - cfs_thread_arg.arg = NULL; - cfs_thread_arg.func = NULL; -} - -void -cfs_thread_agent_fini(void) -{ - assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE); - - spin_lock_done(&cfs_thread_arg.lock); -} - -/* - * - * All requests to create kernel thread will create a new - * thread instance of cfs_thread_agent, one by one. - * cfs_thread_agent will call the caller's thread function - * with argument supplied by caller. - */ -void -cfs_thread_agent (void) -{ - cfs_thread_t func = NULL; - void *arg = NULL; - - thread_arg_recv(&cfs_thread_arg, func, arg); - /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */ - assert(func != NULL); - func(arg); - /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */ - (void) thread_terminate(current_thread()); -} - -extern thread_t kernel_thread(task_t task, void (*start)(void)); - -int -cfs_kernel_thread(cfs_thread_t func, void *arg, int flag) -{ - int ret = 0; - thread_t th = NULL; - - thread_arg_hold(&cfs_thread_arg, func, arg); - th = kernel_thread(kernel_task, cfs_thread_agent); - thread_arg_release(&cfs_thread_arg); - if (th == THREAD_NULL) - ret = -1; - return ret; -} - -void cfs_daemonize(char *str) -{ - snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str); - return; -} - -/* - * XXX Liang: kexts cannot access sigmask in Darwin8. - * it's almost impossible for us to get/set signal mask - * without patching kernel. - * Should we provide these functions in xnu? - * - * These signal functions almost do nothing now, we - * need to investigate more about signal in Darwin. - */ -cfs_sigset_t cfs_get_blockedsigs() -{ - return (cfs_sigset_t)0; -} - -extern int block_procsigmask(struct proc *p, int bit); - -cfs_sigset_t cfs_block_allsigs() -{ - cfs_sigset_t old = 0; -#ifdef __DARWIN8__ -#else - block_procsigmask(current_proc(), -1); -#endif - return old; -} - -cfs_sigset_t cfs_block_sigs(sigset_t bit) -{ - cfs_sigset_t old = 0; -#ifdef __DARWIN8__ -#else - block_procsigmask(current_proc(), bit); -#endif - return old; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ -} - -int cfs_signal_pending(void) - -{ -#ifdef __DARWIN8__ - extern int thread_issignal(proc_t, thread_t, sigset_t); - return thread_issignal(current_proc(), current_thread(), (sigset_t)-1); -#else - return SHOULDissignal(current_proc(), current_uthread()) -#endif -} - -void cfs_clear_sigpending(void) -{ -#ifdef __DARWIN8__ -#else - clear_procsiglist(current_proc(), -1); -#endif -} - -#ifdef __DARWIN8__ - -#else /* !__DARWIN8__ */ - -void lustre_cone_in(boolean_t *state, funnel_t **cone) -{ - *cone = thread_funnel_get(); - if (*cone == network_flock) - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - else if (*cone == NULL) - *state = thread_funnel_set(kernel_flock, TRUE); -} - -void lustre_cone_ex(boolean_t state, funnel_t *cone) -{ - if (cone == network_flock) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - else if (cone == NULL) - (void) thread_funnel_set(kernel_flock, state); -} - -void lustre_net_in(boolean_t *state, funnel_t **cone) -{ - *cone = thread_funnel_get(); - if (*cone == kernel_flock) - thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - else if (*cone == NULL) - *state = thread_funnel_set(network_flock, TRUE); -} - -void lustre_net_ex(boolean_t state, funnel_t *cone) -{ - if (cone == kernel_flock) - thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - else if (cone == NULL) - (void) thread_funnel_set(network_flock, state); -} -#endif /* !__DARWIN8__ */ - -void cfs_waitq_init(struct cfs_waitq *waitq) -{ - ksleep_chan_init(&waitq->wq_ksleep_chan); -} - -void cfs_waitlink_init(struct cfs_waitlink *link) -{ - ksleep_link_init(&link->wl_ksleep_link); -} - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - link->wl_waitq = waitq; - ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link) -{ - link->wl_waitq = waitq; - link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE; - ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -void cfs_waitq_forward(struct cfs_waitlink *link, - struct cfs_waitq *waitq) -{ - link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan; -} - -void cfs_waitq_del(struct cfs_waitq *waitq, - struct cfs_waitlink *link) -{ - ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); -} - -int cfs_waitq_active(struct cfs_waitq *waitq) -{ - return (1); -} - -void cfs_waitq_signal(struct cfs_waitq *waitq) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - ksleep_wake(&waitq->wq_ksleep_chan); -} - -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) -{ - ksleep_wake_nr(&waitq->wq_ksleep_chan, nr); -} - -void cfs_waitq_broadcast(struct cfs_waitq *waitq) -{ - ksleep_wake_all(&waitq->wq_ksleep_chan); -} - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state) -{ - ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state); -} - -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, - cfs_duration_t timeout) -{ - return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan, - state, timeout); -} - -typedef void (*ktimer_func_t)(void *); -void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg) -{ - ktimer_init(&t->t, (ktimer_func_t)func, arg); -} - -void cfs_timer_done(struct cfs_timer *t) -{ - ktimer_done(&t->t); -} - -void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline) -{ - ktimer_arm(&t->t, deadline); -} - -void cfs_timer_disarm(struct cfs_timer *t) -{ - ktimer_disarm(&t->t); -} - -int cfs_timer_is_armed(struct cfs_timer *t) -{ - return ktimer_is_armed(&t->t); -} - -cfs_time_t cfs_timer_deadline(struct cfs_timer *t) -{ - return ktimer_deadline(&t->t); -} - -void cfs_enter_debugger(void) -{ -#ifdef __DARWIN8__ - extern void Debugger(const char * reason); - Debugger("CFS"); -#else - extern void PE_enter_debugger(char *cause); - PE_enter_debugger("CFS"); -#endif -} - -int cfs_online_cpus(void) -{ - int activecpu; - size_t size; - -#ifdef __DARWIN8__ - size = sizeof(int); - sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0); - return activecpu; -#else - host_basic_info_data_t hinfo; - kern_return_t kret; - int count = HOST_BASIC_INFO_COUNT; -#define BSD_HOST 1 - kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); - if (kret == KERN_SUCCESS) - return (hinfo.avail_cpus); - return(-EINVAL); -#endif -} - -int cfs_ncpus(void) -{ - int ncpu; - size_t size; - - size = sizeof(int); - - sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0); - return ncpu; -} diff --git a/lnet/libcfs/darwin/darwin-proc.c b/lnet/libcfs/darwin/darwin-proc.c deleted file mode 100644 index a38902aef1215ac2f2ec2d8dc30f51aa3ee7bd02..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-proc.c +++ /dev/null @@ -1,384 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <sys/param.h> -#include <sys/kernel.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <sys/sysctl.h> -#include <sys/proc.h> -#include <sys/unistd.h> -#include <mach/mach_types.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - -#define LIBCFS_SYSCTL "libcfs" -#define LIBCFS_SYSCTL_SPRITE "sprite" -#define LIBCFS_SYSCTL_MAGIC 0xbabeface - -static struct libcfs_sysctl_sprite { - int ss_magic; - struct sysctl_oid_list *ss_link; -} libcfs_sysctl_sprite = { 0, NULL }; - -static cfs_sysctl_table_header_t *libcfs_table_header = NULL; -extern unsigned int libcfs_debug; -extern unsigned int libcfs_subsystem_debug; -extern unsigned int libcfs_printk; -extern unsigned int libcfs_console_ratelimit; -extern unsigned int libcfs_catastrophe; -extern atomic_t libcfs_kmemory; - -extern long max_debug_mb; -extern int cfs_trace_daemon SYSCTL_HANDLER_ARGS; -extern int cfs_debug_mb SYSCTL_HANDLER_ARGS; -/* - * sysctl table for lnet - */ - -SYSCTL_NODE (, OID_AUTO, lnet, CTLFLAG_RW, - 0, "lnet sysctl top"); - -SYSCTL_INT(_lnet, OID_AUTO, debug, - CTLTYPE_INT | CTLFLAG_RW , &libcfs_debug, - 0, "debug"); -SYSCTL_INT(_lnet, OID_AUTO, subsystem_debug, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_subsystem_debug, - 0, "subsystem debug"); -SYSCTL_INT(_lnet, OID_AUTO, printk, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_printk, - 0, "printk"); -SYSCTL_INT(_lnet, OID_AUTO, console_ratelimit, - CTLTYPE_INT | CTLFLAG_RW, &libcfs_console_ratelimit, - 0, "console_ratelimit"); -SYSCTL_STRING(_lnet, OID_AUTO, debug_path, - CTLTYPE_STRING | CTLFLAG_RW, debug_file_path, - 1024, "debug path"); -SYSCTL_INT(_lnet, OID_AUTO, memused, - CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_kmemory.counter, - 0, "memused"); -SYSCTL_INT(_lnet, OID_AUTO, catastrophe, - CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_catastrophe, - 0, "catastrophe"); -SYSCTL_PROC(_lnet, OID_AUTO, trace_daemon, - CTLTYPE_STRING | CTLFLAG_RW, 0, - 0, &cfs_trace_daemon, "A", "trace daemon"); -SYSCTL_PROC(_lnet, OID_AUTO, debug_mb, - CTLTYPE_INT | CTLFLAG_RW, &max_debug_mb, - 0, &cfs_debug_mb, "L", "max debug size"); - - -static cfs_sysctl_table_t top_table[] = { - &sysctl__lnet, - &sysctl__lnet_debug, - &sysctl__lnet_subsystem_debug, - &sysctl__lnet_printk, - &sysctl__lnet_console_ratelimit, - &sysctl__lnet_debug_path, - &sysctl__lnet_memused, - &sysctl__lnet_catastrophe, - &sysctl__lnet_trace_daemon, - &sysctl__lnet_debug_mb, - NULL -}; - -/* - * Register sysctl table - */ -cfs_sysctl_table_header_t * -cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg) -{ - cfs_sysctl_table_t item; - int i = 0; - - while ((item = table[i++]) != NULL) - sysctl_register_oid(item); - return table; -} - -/* - * Unregister sysctl table - */ -void -cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) { - int i = 0; - cfs_sysctl_table_t item; - - while ((item = table[i++]) != NULL) - sysctl_unregister_oid(item); - return; -} - -/* - * Allocate a sysctl oid. - */ -static struct sysctl_oid * -cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *arg1, int arg2, const char *fmt, - int (*handler) SYSCTL_HANDLER_ARGS) -{ - struct sysctl_oid *oid; - char *sname = NULL; - char *sfmt = NULL; - - if (strlen(name) + 1 > CTL_MAXNAME) { - printf("libcfs: sysctl name: %s is too long.\n", name); - return NULL; - } - oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid), - M_TEMP, M_WAITOK | M_ZERO); - if (oid == NULL) - return NULL; - - sname = (char *)_MALLOC(sizeof(CTL_MAXNAME), - M_TEMP, M_WAITOK | M_ZERO); - if (sname == NULL) - goto error; - strcpy(sname, name); - - sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO); - if (sfmt == NULL) - goto error; - strcpy(sfmt, fmt); - - if (parent == NULL) - oid->oid_parent = &sysctl__children; - else - oid->oid_parent = parent; - oid->oid_number = nbr; - oid->oid_kind = access; - oid->oid_name = sname; - oid->oid_handler = handler; - oid->oid_fmt = sfmt; - - if ((access & CTLTYPE) == CTLTYPE_NODE){ - /* It's a sysctl node */ - struct sysctl_oid_list *link; - - link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list), - M_TEMP, M_WAITOK | M_ZERO); - if (link == NULL) - goto error; - oid->oid_arg1 = link; - oid->oid_arg2 = 0; - } else { - oid->oid_arg1 = arg1; - oid->oid_arg2 = arg2; - } - - return oid; -error: - if (sfmt != NULL) - _FREE(sfmt, M_TEMP); - if (sname != NULL) - _FREE(sname, M_TEMP); - if (oid != NULL) - _FREE(oid, M_TEMP); - return NULL; -} - -void cfs_free_sysctl(struct sysctl_oid *oid) -{ - if (oid->oid_name != NULL) - _FREE((void *)oid->oid_name, M_TEMP); - if (oid->oid_fmt != NULL) - _FREE((void *)oid->oid_fmt, M_TEMP); - if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1) - /* XXX Liang: need to assert the list is empty */ - _FREE(oid->oid_arg1, M_TEMP); - _FREE(oid, M_TEMP); -} - -#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \ - (libcfs_sysctl_sprite.ss_link != NULL)) - -int -cfs_sysctl_isvalid(void) -{ - return CFS_SYSCTL_ISVALID; -} - -struct sysctl_oid * -cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int (*handler) SYSCTL_HANDLER_ARGS) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name, - NULL, 0, "N", handler); -} - -struct sysctl_oid * -cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, - ptr, val, "I", sysctl_handle_int); -} - -struct sysctl_oid * -cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, int *ptr, int val) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, - ptr, val, "L", sysctl_handle_long); -} - -struct sysctl_oid * -cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, char *ptr, int len) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name, - ptr, len, "A", sysctl_handle_string); -} - -struct sysctl_oid * -cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, - const char *name, void *ptr, int size) -{ - if (parent == NULL && CFS_SYSCTL_ISVALID) - parent = libcfs_sysctl_sprite.ss_link; - return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name, - ptr, size, "S", sysctl_handle_opaque); -} - -/* no proc in osx */ -cfs_proc_dir_entry_t * -cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent) -{ - cfs_proc_dir_entry_t *entry; - MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO); - - return entry; -} - -void -cfs_free_proc_entry(cfs_proc_dir_entry_t *de){ - FREE(de, M_TEMP); - return; -}; - -void -cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry) -{ - cfs_free_proc_entry(entry); - return; -} - -int -insert_proc(void) -{ -#if 1 - if (!libcfs_table_header) - libcfs_table_header = cfs_register_sysctl_table(top_table, 0); -#endif - return 0; -} - -void -remove_proc(void) -{ -#if 1 - if (libcfs_table_header != NULL) - cfs_unregister_sysctl_table(libcfs_table_header); - libcfs_table_header = NULL; -#endif - return; -} - -int -cfs_sysctl_init(void) -{ - struct sysctl_oid *oid_root; - struct sysctl_oid *oid_sprite; - struct libcfs_sysctl_sprite *sprite; - size_t len; - int rc; - - len = sizeof(struct libcfs_sysctl_sprite); - rc = sysctlbyname("libcfs.sprite", - (void *)&libcfs_sysctl_sprite, &len, NULL, 0); - if (rc == 0) { - /* - * XXX Liang: assert (rc == 0 || rc == ENOENT) - * - * libcfs.sprite has been registered by previous - * loading of libcfs - */ - if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) { - printf("libcfs: magic number of libcfs.sprite " - "is not right (%lx, %lx)\n", - libcfs_sysctl_sprite.ss_magic, - LIBCFS_SYSCTL_MAGIC); - return -1; - } - assert(libcfs_sysctl_sprite.ss_link != NULL); - printf("libcfs: registered libcfs.sprite found.\n"); - return 0; - } - oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - LIBCFS_SYSCTL, 0); - if (oid_root == NULL) - return -1; - sysctl_register_oid(oid_root); - - sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite), - M_TEMP, M_WAITOK | M_ZERO); - if (sprite == NULL) { - sysctl_unregister_oid(oid_root); - cfs_free_sysctl(oid_root); - return -1; - } - sprite->ss_magic = LIBCFS_SYSCTL_MAGIC; - sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1; - oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1, - OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, - LIBCFS_SYSCTL_SPRITE, sprite, - sizeof(struct libcfs_sysctl_sprite)); - if (oid_sprite == NULL) { - cfs_free_sysctl(oid_sprite); - sysctl_unregister_oid(oid_root); - cfs_free_sysctl(oid_root); - return -1; - } - sysctl_register_oid(oid_sprite); - - libcfs_sysctl_sprite.ss_magic = sprite->ss_magic; - libcfs_sysctl_sprite.ss_link = sprite->ss_link; - - return 0; -} - -void -cfs_sysctl_fini(void) -{ - libcfs_sysctl_sprite.ss_magic = 0; - libcfs_sysctl_sprite.ss_link = NULL; -} - diff --git a/lnet/libcfs/darwin/darwin-sync.c b/lnet/libcfs/darwin/darwin-sync.c deleted file mode 100644 index dc2af0ffea36981c09f76f4115c15f4bbb1bee20..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-sync.c +++ /dev/null @@ -1,1025 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -/* - * xnu_sync.c - * - * Created by nikita on Sun Jul 18 2004. - * - * XNU synchronization primitives. - */ - -/* - * This file contains very simplistic implementations of (saner) API for - * basic synchronization primitives: - * - * - spin-lock (kspin) - * - * - semaphore (ksem) - * - * - mutex (kmut) - * - * - condition variable (kcond) - * - * - wait-queue (ksleep_chan and ksleep_link) - * - * - timer (ktimer) - * - * A lot can be optimized here. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#ifdef __DARWIN8__ -# include <kern/locks.h> -#else -# include <mach/mach_types.h> -# include <sys/types.h> -# include <kern/simple_lock.h> -#endif - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e)) - -#ifdef HAVE_GET_PREEMPTION_LEVEL -extern int get_preemption_level(void); -#else -#define get_preemption_level() (0) -#endif - -#if SMP -#ifdef __DARWIN8__ - -static lck_grp_t *cfs_lock_grp = NULL; -#warning "Verify definition of lck_spin_t hasn't been changed while building!" - -/* hw_lock_* are not exported by Darwin8 */ -static inline void xnu_spin_init(xnu_spin_t *s) -{ - SLASSERT(cfs_lock_grp != NULL); - //*s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL); - lck_spin_init((lck_spin_t *)s, cfs_lock_grp, LCK_ATTR_NULL); -} - -static inline void xnu_spin_done(xnu_spin_t *s) -{ - SLASSERT(cfs_lock_grp != NULL); - //lck_spin_free(*s, cfs_lock_grp); - //*s = NULL; - lck_spin_destroy((lck_spin_t *)s, cfs_lock_grp); -} - -#define xnu_spin_lock(s) lck_spin_lock((lck_spin_t *)(s)) -#define xnu_spin_unlock(s) lck_spin_unlock((lck_spin_t *)(s)) - -#warning "Darwin8 does not export lck_spin_try_lock" -#define xnu_spin_try(s) (1) - -#else /* DARWIN8 */ -extern void hw_lock_init(hw_lock_t); -extern void hw_lock_lock(hw_lock_t); -extern void hw_lock_unlock(hw_lock_t); -extern unsigned int hw_lock_to(hw_lock_t, unsigned int); -extern unsigned int hw_lock_try(hw_lock_t); -extern unsigned int hw_lock_held(hw_lock_t); - -#define xnu_spin_init(s) hw_lock_init(s) -#define xnu_spin_done(s) do {} while (0) -#define xnu_spin_lock(s) hw_lock_lock(s) -#define xnu_spin_unlock(s) hw_lock_unlock(s) -#define xnu_spin_try(s) hw_lock_try(s) -#endif /* DARWIN8 */ - -#else /* SMP */ -#define xnu_spin_init(s) do {} while (0) -#define xnu_spin_done(s) do {} while (0) -#define xnu_spin_lock(s) do {} while (0) -#define xnu_spin_unlock(s) do {} while (0) -#define xnu_spin_try(s) (1) -#endif /* SMP */ - -/* - * Warning: low level libcfs debugging code (libcfs_debug_msg(), for - * example), uses spin-locks, so debugging output here may lead to nasty - * surprises. - * - * In uniprocessor version of spin-lock. Only checks. - */ - -void kspin_init(struct kspin *spin) -{ - SLASSERT(spin != NULL); - xnu_spin_init(&spin->lock); - ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC); - ON_SYNC_DEBUG(spin->owner = NULL); -} - -void kspin_done(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner == NULL); - xnu_spin_done(&spin->lock); -} - -void kspin_lock(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner != current_thread()); - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - xnu_spin_lock(&spin->lock); - SLASSERT(spin->owner == NULL); - ON_SYNC_DEBUG(spin->owner = current_thread()); -} - -void kspin_unlock(struct kspin *spin) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - SLASSERT(spin->owner == current_thread()); - ON_SYNC_DEBUG(spin->owner = NULL); - xnu_spin_unlock(&spin->lock); -} - -int kspin_trylock(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - - if (xnu_spin_try(&spin->lock)) { - SLASSERT(spin->owner == NULL); - ON_SYNC_DEBUG(spin->owner = current_thread()); - return 1; - } else - return 0; -} - -#if XNU_SYNC_DEBUG -int kspin_islocked(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - return spin->owner == current_thread(); -} - -int kspin_isnotlocked(struct kspin *spin) -{ - SLASSERT(spin != NULL); - SLASSERT(spin->magic == KSPIN_MAGIC); - return spin->owner != current_thread(); -} -#endif - -/* - * read/write spin-lock - */ -void krw_spin_init(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - - kspin_init(&rwspin->guard); - rwspin->count = 0; - ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC); -} - -void krw_spin_done(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count == 0); - kspin_done(&rwspin->guard); -} - -void krw_spin_down_r(struct krw_spin *rwspin) -{ - int i; - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - - kspin_lock(&rwspin->guard); - while(rwspin->count < 0) { - i = -1; - kspin_unlock(&rwspin->guard); - while (--i != 0 && rwspin->count < 0) - continue; - kspin_lock(&rwspin->guard); - } - ++ rwspin->count; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_down_w(struct krw_spin *rwspin) -{ - int i; - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - - kspin_lock(&rwspin->guard); - while (rwspin->count != 0) { - i = -1; - kspin_unlock(&rwspin->guard); - while (--i != 0 && rwspin->count != 0) - continue; - kspin_lock(&rwspin->guard); - } - rwspin->count = -1; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_up_r(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count > 0); - - kspin_lock(&rwspin->guard); - -- rwspin->count; - kspin_unlock(&rwspin->guard); -} - -void krw_spin_up_w(struct krw_spin *rwspin) -{ - SLASSERT(rwspin != NULL); - SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); - SLASSERT(rwspin->count == -1); - - kspin_lock(&rwspin->guard); - rwspin->count = 0; - kspin_unlock(&rwspin->guard); -} - -/* - * semaphore - */ -#ifdef __DARWIN8__ - -#define xnu_waitq_init(q, a) do {} while (0) -#define xnu_waitq_done(q) do {} while (0) -#define xnu_waitq_wakeup_one(q, e, s) ({wakeup_one((void *)(e)); KERN_SUCCESS;}) -#define xnu_waitq_wakeup_all(q, e, s) ({wakeup((void *)(e)); KERN_SUCCESS;}) -#define xnu_waitq_assert_wait(q, e, s) assert_wait((e), s) - -#else /* DARWIN8 */ - -#define xnu_waitq_init(q, a) wait_queue_init((q), a) -#define xnu_waitq_done(q) do {} while (0) -#define xnu_waitq_wakeup_one(q, e, s) wait_queue_wakeup_one((q), (event_t)(e), s) -#define xnu_waitq_wakeup_all(q, e, s) wait_queue_wakeup_all((q), (event_t)(e), s) -#define xnu_waitq_assert_wait(q, e, s) wait_queue_assert_wait((q), (event_t)(e), s) - -#endif /* DARWIN8 */ -void ksem_init(struct ksem *sem, int value) -{ - SLASSERT(sem != NULL); - kspin_init(&sem->guard); - xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO); - sem->value = value; - ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC); -} - -void ksem_done(struct ksem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - /* - * XXX nikita: cannot check that &sem->q is empty because - * wait_queue_empty() is Apple private API. - */ - kspin_done(&sem->guard); -} - -int ksem_up(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - - kspin_lock(&sem->guard); - sem->value += value; - if (sem->value == 0) - result = xnu_waitq_wakeup_one(&sem->q, sem, - THREAD_AWAKENED); - else - result = xnu_waitq_wakeup_all(&sem->q, sem, - THREAD_AWAKENED); - kspin_unlock(&sem->guard); - SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); - return (result == KERN_SUCCESS) ? 0 : 1; -} - -void ksem_down(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->guard); - while (sem->value < value) { - result = xnu_waitq_assert_wait(&sem->q, sem, - THREAD_UNINT); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - kspin_unlock(&sem->guard); - if (result == THREAD_WAITING) - thread_block(THREAD_CONTINUE_NULL); - kspin_lock(&sem->guard); - } - sem->value -= value; - kspin_unlock(&sem->guard); -} - -int ksem_trydown(struct ksem *sem, int value) -{ - int result; - - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KSEM_MAGIC); - SLASSERT(value >= 0); - - kspin_lock(&sem->guard); - if (sem->value >= value) { - sem->value -= value; - result = 0; - } else - result = -EBUSY; - kspin_unlock(&sem->guard); - return result; -} - -void kmut_init(struct kmut *mut) -{ - SLASSERT(mut != NULL); - ksem_init(&mut->s, 1); - ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC); - ON_SYNC_DEBUG(mut->owner = NULL); -} - -void kmut_done(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner == NULL); - ksem_done(&mut->s); -} - -void kmut_lock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner != current_thread()); - SLASSERT(get_preemption_level() == 0); - - ksem_down(&mut->s, 1); - ON_SYNC_DEBUG(mut->owner = current_thread()); -} - -void kmut_unlock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - SLASSERT(mut->owner == current_thread()); - - ON_SYNC_DEBUG(mut->owner = NULL); - ksem_up(&mut->s, 1); -} - -int kmut_trylock(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return ksem_trydown(&mut->s, 1); -} - -#if XNU_SYNC_DEBUG -int kmut_islocked(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return mut->owner == current_thread(); -} - -int kmut_isnotlocked(struct kmut *mut) -{ - SLASSERT(mut != NULL); - SLASSERT(mut->magic == KMUT_MAGIC); - return mut->owner != current_thread(); -} -#endif - - -void kcond_init(struct kcond *cond) -{ - SLASSERT(cond != NULL); - - kspin_init(&cond->guard); - cond->waiters = NULL; - ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC); -} - -void kcond_done(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(cond->waiters == NULL); - kspin_done(&cond->guard); -} - -void kcond_wait(struct kcond *cond, struct kspin *lock) -{ - struct kcond_link link; - - SLASSERT(cond != NULL); - SLASSERT(lock != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(lock)); - - ksem_init(&link.sem, 0); - kspin_lock(&cond->guard); - link.next = cond->waiters; - cond->waiters = &link; - kspin_unlock(&cond->guard); - kspin_unlock(lock); - - ksem_down(&link.sem, 1); - - kspin_lock(&cond->guard); - kspin_unlock(&cond->guard); - kspin_lock(lock); -} - -void kcond_wait_guard(struct kcond *cond) -{ - struct kcond_link link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - ksem_init(&link.sem, 0); - link.next = cond->waiters; - cond->waiters = &link; - kspin_unlock(&cond->guard); - - ksem_down(&link.sem, 1); - - kspin_lock(&cond->guard); -} - -void kcond_signal_guard(struct kcond *cond) -{ - struct kcond_link *link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - link = cond->waiters; - if (link != NULL) { - cond->waiters = link->next; - ksem_up(&link->sem, 1); - } -} - -void kcond_signal(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - - kspin_lock(&cond->guard); - kcond_signal_guard(cond); - kspin_unlock(&cond->guard); -} - -void kcond_broadcast_guard(struct kcond *cond) -{ - struct kcond_link *link; - - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - SLASSERT(kspin_islocked(&cond->guard)); - - for (link = cond->waiters; link != NULL; link = link->next) - ksem_up(&link->sem, 1); - cond->waiters = NULL; -} - -void kcond_broadcast(struct kcond *cond) -{ - SLASSERT(cond != NULL); - SLASSERT(cond->magic == KCOND_MAGIC); - - kspin_lock(&cond->guard); - kcond_broadcast_guard(cond); - kspin_unlock(&cond->guard); -} - -void krw_sem_init(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - - kcond_init(&sem->cond); - sem->count = 0; - ON_SYNC_DEBUG(sem->magic = KRW_MAGIC); -} - -void krw_sem_done(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count == 0); - kcond_done(&sem->cond); -} - -void krw_sem_down_r(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->cond.guard); - while (sem->count < 0) - kcond_wait_guard(&sem->cond); - ++ sem->count; - kspin_unlock(&sem->cond.guard); -} - -int krw_sem_down_r_try(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - - kspin_lock(&sem->cond.guard); - if (sem->count < 0) { - kspin_unlock(&sem->cond.guard); - return -EBUSY; - } - ++ sem->count; - kspin_unlock(&sem->cond.guard); - return 0; -} - -void krw_sem_down_w(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(get_preemption_level() == 0); - - kspin_lock(&sem->cond.guard); - while (sem->count != 0) - kcond_wait_guard(&sem->cond); - sem->count = -1; - kspin_unlock(&sem->cond.guard); -} - -int krw_sem_down_w_try(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - - kspin_lock(&sem->cond.guard); - if (sem->count != 0) { - kspin_unlock(&sem->cond.guard); - return -EBUSY; - } - sem->count = -1; - kspin_unlock(&sem->cond.guard); - return 0; -} - -void krw_sem_up_r(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count > 0); - - kspin_lock(&sem->cond.guard); - -- sem->count; - if (sem->count == 0) - kcond_broadcast_guard(&sem->cond); - kspin_unlock(&sem->cond.guard); -} - -void krw_sem_up_w(struct krw_sem *sem) -{ - SLASSERT(sem != NULL); - SLASSERT(sem->magic == KRW_MAGIC); - SLASSERT(sem->count == -1); - - kspin_lock(&sem->cond.guard); - sem->count = 0; - kspin_unlock(&sem->cond.guard); - kcond_broadcast(&sem->cond); -} - -void ksleep_chan_init(struct ksleep_chan *chan) -{ - SLASSERT(chan != NULL); - - kspin_init(&chan->guard); - CFS_INIT_LIST_HEAD(&chan->waiters); - ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC); -} - -void ksleep_chan_done(struct ksleep_chan *chan) -{ - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(list_empty(&chan->waiters)); - kspin_done(&chan->guard); -} - -void ksleep_link_init(struct ksleep_link *link) -{ - SLASSERT(link != NULL); - - CFS_INIT_LIST_HEAD(&link->linkage); - link->flags = 0; - link->event = current_thread(); - link->hits = 0; - link->forward = NULL; - ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC); -} - -void ksleep_link_done(struct ksleep_link *link) -{ - SLASSERT(link != NULL); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - SLASSERT(list_empty(&link->linkage)); -} - -void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link) -{ - SLASSERT(chan != NULL); - SLASSERT(link != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - SLASSERT(list_empty(&link->linkage)); - - kspin_lock(&chan->guard); - if (link->flags & KSLEEP_EXCLUSIVE) - list_add_tail(&link->linkage, &chan->waiters); - else - list_add(&link->linkage, &chan->waiters); - kspin_unlock(&chan->guard); -} - -void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link) -{ - SLASSERT(chan != NULL); - SLASSERT(link != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(link->magic == KSLEEP_LINK_MAGIC); - - kspin_lock(&chan->guard); - list_del_init(&link->linkage); - kspin_unlock(&chan->guard); -} - -static int has_hits(struct ksleep_chan *chan, event_t event) -{ - struct ksleep_link *scan; - - SLASSERT(kspin_islocked(&chan->guard)); - list_for_each_entry(scan, &chan->waiters, linkage) { - if (scan->event == event && scan->hits > 0) { - /* consume hit */ - -- scan->hits; - return 1; - } - } - return 0; -} - -static void add_hit(struct ksleep_chan *chan, event_t event) -{ - struct ksleep_link *scan; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(kspin_islocked(&chan->guard)); - list_for_each_entry(scan, &chan->waiters, linkage) { - if (scan->event == event) { - ++ scan->hits; - break; - } - } -} - -void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state) -{ - event_t event; - int result; - - ENTRY; - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(get_preemption_level() == 0); - - event = current_thread(); - kspin_lock(&chan->guard); - if (!has_hits(chan, event)) { - result = assert_wait(event, state); - kspin_unlock(&chan->guard); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - if (result == THREAD_WAITING) - thread_block(THREAD_CONTINUE_NULL); - } else - kspin_unlock(&chan->guard); - EXIT; -} - -/* - * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining - * sleep time (non-zero only if thread was waken by a signal (not currently - * implemented), or waitq was already in the "signalled" state). - */ -int64_t ksleep_timedwait(struct ksleep_chan *chan, - cfs_task_state_t state, - uint64_t timeout) -{ - event_t event; - - ENTRY; - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - SLASSERT(get_preemption_level() == 0); - - event = current_thread(); - kspin_lock(&chan->guard); - if (!has_hits(chan, event)) { - int result; - uint64_t expire; - result = assert_wait(event, state); - if (timeout > 0) { - /* - * arm a timer. thread_set_timer()'s first argument is - * uint32_t, so we have to cook deadline ourselves. - */ - nanoseconds_to_absolutetime(timeout, &expire); - clock_absolutetime_interval_to_deadline(expire, &expire); - thread_set_timer_deadline(expire); - } - kspin_unlock(&chan->guard); - SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); - if (result == THREAD_WAITING) - result = thread_block(THREAD_CONTINUE_NULL); - thread_cancel_timer(); - - if (result == THREAD_TIMED_OUT) - timeout = 0; - else { - uint64_t now; - clock_get_uptime(&now); - if (expire > now) - absolutetime_to_nanoseconds(expire - now, &timeout); - else - timeout = 0; - } - } else { - /* just return timeout, because I've got event and don't need to wait */ - kspin_unlock(&chan->guard); - } - - RETURN(timeout); -} - -/* - * wake up single exclusive waiter (plus some arbitrary number of * - * non-exclusive) - */ -void ksleep_wake(struct ksleep_chan *chan) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - ksleep_wake_nr(chan, 1); -} - -/* - * wake up all waiters on @chan - */ -void ksleep_wake_all(struct ksleep_chan *chan) -{ - ENTRY; - ksleep_wake_nr(chan, 0); - EXIT; -} - -/* - * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary - * number of non-exclusive. If @nr is 0, wake up all waiters. - */ -void ksleep_wake_nr(struct ksleep_chan *chan, int nr) -{ - struct ksleep_link *scan; - int result; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - SLASSERT(chan != NULL); - SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); - - kspin_lock(&chan->guard); - list_for_each_entry(scan, &chan->waiters, linkage) { - struct ksleep_chan *forward; - - forward = scan->forward; - if (forward != NULL) - kspin_lock(&forward->guard); - result = thread_wakeup(scan->event); - SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); - if (result == KERN_NOT_WAITING) { - ++ scan->hits; - if (forward != NULL) - add_hit(forward, scan->event); - } - if (forward != NULL) - kspin_unlock(&forward->guard); - if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0) - break; - } - kspin_unlock(&chan->guard); -} - -void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg) -{ - SLASSERT(t != NULL); - SLASSERT(func != NULL); - - kspin_init(&t->guard); - t->func = func; - t->arg = arg; - ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC); -} - -void ktimer_done(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - kspin_done(&t->guard); - ON_SYNC_DEBUG(t->magic = 0); -} - -static void ktimer_actor(void *arg0, void *arg1) -{ - struct ktimer *t; - int armed; - - t = arg0; - /* - * this assumes that ktimer's are never freed. - */ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - /* - * call actual timer function - */ - kspin_lock(&t->guard); - armed = t->armed; - t->armed = 0; - kspin_unlock(&t->guard); - - if (armed) - t->func(t->arg); -} - -extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t); -extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, uint64_t); - -static void ktimer_disarm_locked(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - thread_call_func_cancel(ktimer_actor, t, FALSE); -} - -/* - * Received deadline is nanoseconds, but time checked by - * thread_call is absolute time (The abstime unit is equal to - * the length of one bus cycle, so the duration is dependent - * on the bus speed of the computer), so we need to convert - * nanotime to abstime by nanoseconds_to_absolutetime(). - * - * Refer to _delayed_call_timer(...) - * - * if thread_call_func_delayed is not exported in the future, - * we can use timeout() or bsd_timeout() to replace it. - */ -void ktimer_arm(struct ktimer *t, u_int64_t deadline) -{ - cfs_time_t abstime; - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - kspin_lock(&t->guard); - ktimer_disarm_locked(t); - t->armed = 1; - nanoseconds_to_absolutetime(deadline, &abstime); - thread_call_func_delayed(ktimer_actor, t, deadline); - kspin_unlock(&t->guard); -} - -void ktimer_disarm(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - kspin_lock(&t->guard); - t->armed = 0; - ktimer_disarm_locked(t); - kspin_unlock(&t->guard); -} - -int ktimer_is_armed(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - /* - * no locking---result is only a hint anyway. - */ - return t->armed; -} - -u_int64_t ktimer_deadline(struct ktimer *t) -{ - SLASSERT(t != NULL); - SLASSERT(t->magic == KTIMER_MAGIC); - - return t->deadline; -} - -void cfs_sync_init(void) -{ -#ifdef __DARWIN8__ - /* Initialize lock group */ - cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL); -#endif -} - -void cfs_sync_fini(void) -{ -#ifdef __DARWIN8__ - /* - * XXX Liang: destroy lock group. As we haven't called lock_done - * for all locks, cfs_lock_grp may not be freed by kernel(reference - * count > 1). - */ - lck_grp_free(cfs_lock_grp); - cfs_lock_grp = NULL; -#endif -} -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/darwin/darwin-tcpip.c b/lnet/libcfs/darwin/darwin-tcpip.c deleted file mode 100644 index c6609a78d6452f47a7e5e08c09e0c1285233795a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-tcpip.c +++ /dev/null @@ -1,1339 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ - -#include <mach/mach_types.h> -#include <sys/file.h> -#include <sys/mount.h> -#include <string.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <sys/socket.h> -#include <sys/socketvar.h> -#include <sys/sockio.h> -#include <sys/protosw.h> -#include <net/if.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -static __inline__ struct sockaddr_in -blank_sin() -{ - struct sockaddr_in blank = { sizeof(struct sockaddr_in), AF_INET }; - return (blank); -} - -void -libcfs_ipif_free_enumeration (char **names, int n) -{ - int i; - - LASSERT (n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} - -#ifdef __DARWIN8__ -/* - * Darwin 8.x - * - * No hack kernel structre, all using KPI. - */ - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - socket_t so; - __u32 val; - int nob; - int rc; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - NULL, NULL, &so); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return rc; - } - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - rc = -EINVAL; - goto out; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr); - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - goto out; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - goto out; - } - - *up = 1; - - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - rc = -sock_ioctl(so, SIOCGIFADDR, &ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - bzero(&ifr, sizeof(ifr)); - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr); - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *mask = ntohl(val); -out: - sock_close(so); - return rc; -} - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - socket_t so; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - NULL, NULL, &so); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (rc); - } - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - -#if 1 - /* - * XXX Liang: - * sock_ioctl(..., SIOCGIFCONF, ...) is not supposed to be used in - * kernel space because it always try to copy result to userspace. - * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...). - * I've created a bug for Apple, let's wait... - */ - nfound = 0; - for (i = 0; i < 16; i++) { - struct ifreq en; - bzero(&en, sizeof(en)); - snprintf(en.ifr_name, IFNAMSIZ, "en%d", i); - rc = -sock_ioctl (so, SIOCGIFFLAGS, &en); - if (rc != 0) - continue; - strcpy(ifr[nfound++].ifr_name, en.ifr_name); - } - -#else /* NOT in using now */ - rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc); - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); -#endif - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); -out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); -out0: - sock_close(so); - return rc; - -} - -/* - * Public entry of socket upcall. - * - * so_upcall can only be installed while create/accept of socket in - * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all - * sockets in creat/accept, it will call upcall provided by user - * which can be setup after create/accept of socket. - */ -static void libcfs_sock_upcall(socket_t so, void* arg, int waitf) -{ - cfs_socket_t *sock; - - sock = (cfs_socket_t *)arg; - LASSERT(sock->s_magic == CFS_SOCK_MAGIC); - - if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL) - sock->s_upcall(so, sock->s_upcallarg, waitf); - return; -} - -void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg) -{ - sock->s_upcall = callback; - sock->s_upcallarg = arg; - sock->s_flags |= CFS_SOCK_UPCALL; - return; -} - -void libcfs_sock_reset_cb(cfs_socket_t *sock) -{ - sock->s_flags &= ~CFS_SOCK_UPCALL; - sock->s_upcall = NULL; - sock->s_upcallarg = NULL; - return; -} - -static int -libcfs_sock_create (cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - cfs_socket_t *sock; - int option; - int optlen; - int rc; - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); - if (!sock) { - CERROR("Can't allocate cfs_socket.\n"); - return -ENOMEM; - } - *sockp = sock; - sock->s_magic = CFS_SOCK_MAGIC; - - rc = -sock_socket(PF_INET, SOCK_STREAM, 0, - libcfs_sock_upcall, sock, &C2B_SOCK(sock)); - if (rc != 0) - goto out; - option = 1; - optlen = sizeof(option); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, - SO_REUSEADDR, &option, optlen); - if (rc != 0) - goto out; - - /* can't specify a local port without a local IP */ - LASSERT (local_ip == 0 || local_port != 0); - - if (local_ip != 0 || local_port != 0) { - bzero (&locaddr, sizeof (locaddr)); - locaddr.sin_len = sizeof(struct sockaddr_in); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons (local_port); - locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY; - rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto out; - } - if (rc != 0) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto out; - } - } - return 0; -out: - if (C2B_SOCK(sock) != NULL) - sock_close(C2B_SOCK(sock)); - FREE(sock, M_TEMP); - return rc; -} - -int -libcfs_sock_listen (cfs_socket_t **sockp, - __u32 local_ip, int local_port, int backlog) -{ - cfs_socket_t *sock; - int fatal; - int rc; - - rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - - } - rc = -sock_listen(C2B_SOCK(sock), backlog); - if (rc == 0) { - *sockp = sock; - return 0; - } - - if (C2B_SOCK(sock) != NULL) - sock_close(C2B_SOCK(sock)); - FREE(sock, M_TEMP); - return rc; -} - -int -libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock) -{ - cfs_socket_t *newsock; - int rc; - - newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); - if (!newsock) { - CERROR("Can't allocate cfs_socket.\n"); - return -ENOMEM; - } - newsock->s_magic = CFS_SOCK_MAGIC; - /* - * thread will sleep in sock_accept by calling of msleep(), - * it can be interrupted because msleep() use PCATCH as argument. - */ - rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0, - libcfs_sock_upcall, newsock, &C2B_SOCK(newsock)); - if (rc) { - if (C2B_SOCK(newsock) != NULL) - sock_close(C2B_SOCK(newsock)); - FREE(newsock, M_TEMP); - if ((sock->s_flags & CFS_SOCK_DOWN) != 0) - /* shutdown by libcfs_sock_abort_accept(), fake - * error number for lnet_acceptor() */ - rc = -EAGAIN; - return rc; - } - *newsockp = newsock; - return 0; -} - -void -libcfs_sock_abort_accept (cfs_socket_t *sock) -{ - /* - * XXX Liang: - * - * we want to wakeup thread blocked by sock_accept, but we don't - * know the address where thread is sleeping on, so we cannot - * wakeup it directly. - * The thread slept in sock_accept will be waken up while: - * 1. interrupt by signal - * 2. new connection is coming (sonewconn) - * 3. disconnecting of the socket (soisconnected) - * - * Cause we can't send signal to a thread directly(no KPI), so the - * only thing can be done here is disconnect the socket (by - * sock_shutdown() or sth else? ). - * - * Shutdown request of socket with SHUT_WR or SHUT_RDWR will - * be issured to the protocol. - * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()-> - * tcp_close()->soisdisconnected(), it will wakeup thread by - * wakeup((caddr_t)&so->so_timeo); - */ - sock->s_flags |= CFS_SOCK_DOWN; - sock_shutdown(C2B_SOCK(sock), SHUT_RDWR); -} - -int -libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) -{ - size_t rcvlen; - int rc; - cfs_duration_t to = cfs_time_seconds(timeout); - cfs_time_t then; - struct timeval tv; - - LASSERT(nob > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0, - }; - cfs_duration_usec(to, &tv); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, - &tv, sizeof(tv)); - if (rc != 0) { - CERROR("Can't set socket recv timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - then = cfs_time_current(); - rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); - to -= cfs_time_current() - then; - - if (rc != 0 && rc != -EWOULDBLOCK) - return rc; - if (rcvlen == nob) - return 0; - - if (to <= 0) - return -EAGAIN; - - buffer = ((char *)buffer) + rcvlen; - nob -= rcvlen; - } - return 0; -} - -int -libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) -{ - size_t sndlen; - int rc; - cfs_duration_t to = cfs_time_seconds(timeout); - cfs_time_t then; - struct timeval tv; - - LASSERT(nob > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, - }; - - if (timeout != 0) { - cfs_duration_usec(to, &tv); - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, - &tv, sizeof(tv)); - if (rc != 0) { - CERROR("Can't set socket send timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - then = cfs_time_current(); - rc = -sock_send(C2B_SOCK(sock), &msg, - ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); - to -= cfs_time_current() - then; - - if (rc != 0 && rc != -EWOULDBLOCK) - return rc; - if (sndlen == nob) - return 0; - - if (to <= 0) - return -EAGAIN; - buffer = ((char *)buffer) + sndlen; - nob -= sndlen; - } - return 0; - -} - -int -libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int rc; - - if (remote != 0) - /* Get remote address */ - rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); - else - /* Get local address */ - rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); - if (rc != 0) { - CERROR ("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip != NULL) - *ip = ntohl (sin.sin_addr.s_addr); - - if (port != NULL) - *port = ntohs (sin.sin_port); - return 0; -} - -int -libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) -{ - int option; - int rc; - - if (txbufsize != 0) { - option = txbufsize; - rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - return 0; -} - -int -libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize) -{ - int option; - int optlen; - int rc; - - if (txbufsize != NULL) { - optlen = sizeof(option); - rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, - (char *)&option, &optlen); - if (rc != 0) { - CERROR ("Can't get send buffer size: %d\n", rc); - return (rc); - } - *txbufsize = option; - } - - if (rxbufsize != NULL) { - optlen = sizeof(option); - rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, - (char *)&option, &optlen); - if (rc != 0) { - CERROR ("Can't get receive buffer size: %d\n", rc); - return (rc); - } - *rxbufsize = option; - } - return 0; -} - -void -libcfs_sock_release (cfs_socket_t *sock) -{ - if (C2B_SOCK(sock) != NULL) { - sock_shutdown(C2B_SOCK(sock), 2); - sock_close(C2B_SOCK(sock)); - } - FREE(sock, M_TEMP); -} - -int -libcfs_sock_connect (cfs_socket_t **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - cfs_socket_t *sock; - struct sockaddr_in srvaddr; - int rc; - - rc = libcfs_sock_create(&sock, fatal, local_ip, local_port); - if (rc != 0) - return rc; - - bzero(&srvaddr, sizeof(srvaddr)); - srvaddr.sin_len = sizeof(struct sockaddr_in); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0); - if (rc == 0) { - *sockp = sock; - return 0; - } - - *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE); - CDEBUG(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - - libcfs_sock_release(sock); - return rc; -} - -#else /* !__DARWIN8__ */ - -/* - * To use bigger buffer for socket: - * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so - * we must patch kernel). - * 2. Increase net.inet.tcp.reass.maxsegments - * 3. Increase net.inet.tcp.sendspace - * 4. Increase net.inet.tcp.recvspace - * 5. Increase kern.ipc.maxsockbuf - */ -#define KSOCK_MAX_BUF (1152*1024) - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct socket *so; - struct ifreq ifr; - int nob; - int rc; - __u32 val; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - rc = -EINVAL; - goto out; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - strcpy(ifr.ifr_name, name); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - goto out; - } - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - goto out; - } - - *up = 1; - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - strcpy(ifr.ifr_name, name); - *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); - CFS_NET_IN; - rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc()); - CFS_NET_EX; - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *mask = ntohl(val); -out: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return -rc; -} - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct socket *so; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - CFS_NET_IN; - rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc()); - CFS_NET_EX; - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - -out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); -out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); -out0: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return rc; -} - -static int -libcfs_sock_create (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - struct socket *so; - struct sockopt sopt; - int option; - int rc; - CFS_DECL_FUNNEL_DATA; - - *fatal = 1; - CFS_NET_IN; - rc = socreate(PF_INET, &so, SOCK_STREAM, 0); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (-rc); - } - - bzero(&sopt, sizeof sopt); - option = 1; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_REUSEADDR; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - CFS_NET_IN; - rc = sosetopt(so, &sopt); - if (rc != 0) { - CFS_NET_EX; - CERROR ("Can't set sock reuse address: %d\n", rc); - goto out; - } - /* can't specify a local port without a local IP */ - LASSERT (local_ip == 0 || local_port != 0); - - if (local_ip != 0 || local_port != 0) { - bzero (&locaddr, sizeof (locaddr)); - locaddr.sin_len = sizeof(struct sockaddr_in); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons (local_port); - locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : - INADDR_ANY; - - rc = sobind(so, (struct sockaddr *)&locaddr); - if (rc == EADDRINUSE) { - CFS_NET_EX; - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto out; - } - if (rc != 0) { - CFS_NET_EX; - CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n", - HIPQUAD(local_ip), rc); - goto out; - } - } - *sockp = so; - return 0; -out: - CFS_NET_IN; - soclose(so); - CFS_NET_EX; - return -rc; -} - -int -libcfs_sock_listen (struct socket **sockp, - __u32 local_ip, int local_port, int backlog) -{ - int fatal; - int rc; - CFS_DECL_FUNNEL_DATA; - - rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - CFS_NET_IN; - rc = solisten(*sockp, backlog); - CFS_NET_EX; - if (rc == 0) - return 0; - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - CFS_NET_IN; - soclose(*sockp); - CFS_NET_EX; - return -rc; -} - -int -libcfs_sock_accept (struct socket **newsockp, struct socket *sock) -{ - struct socket *so; - struct sockaddr *sa; - int error, s; - CFS_DECL_FUNNEL_DATA; - - CFS_NET_IN; - s = splnet(); - if ((sock->so_options & SO_ACCEPTCONN) == 0) { - splx(s); - CFS_NET_EX; - return (-EINVAL); - } - - if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) { - splx(s); - CFS_NET_EX; - return (-EWOULDBLOCK); - } - - error = 0; - while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) { - if (sock->so_state & SS_CANTRCVMORE) { - sock->so_error = ECONNABORTED; - break; - } - error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH, - "accept", 0); - if (error) { - splx(s); - CFS_NET_EX; - return (-error); - } - } - if (sock->so_error) { - error = sock->so_error; - sock->so_error = 0; - splx(s); - CFS_NET_EX; - return (-error); - } - - /* - * At this point we know that there is at least one connection - * ready to be accepted. Remove it from the queue prior to - * allocating the file descriptor for it since falloc() may - * block allowing another process to accept the connection - * instead. - */ - so = TAILQ_FIRST(&sock->so_comp); - TAILQ_REMOVE(&sock->so_comp, so, so_list); - sock->so_qlen--; - - so->so_state &= ~SS_COMP; - so->so_head = NULL; - sa = 0; - (void) soaccept(so, &sa); - - *newsockp = so; - FREE(sa, M_SONAME); - splx(s); - CFS_NET_EX; - return (-error); -} - -void -libcfs_sock_abort_accept (struct socket *sock) -{ - wakeup(&sock->so_timeo); -} - -/* - * XXX Liang: timeout for write is not supported yet. - */ -int -libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - CFS_DECL_NET_DATA; - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct uio suio = { - .uio_iov = &iov, - .uio_iovcnt = 1, - .uio_offset = 0, - .uio_resid = nob, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_WRITE, - .uio_procp = NULL - }; - - CFS_NET_IN; - rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0); - CFS_NET_EX; - - if (rc != 0) { - if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ - rc == EWOULDBLOCK)) - rc = 0; - if ( rc != 0 ) - return -rc; - rc = nob - suio.uio_resid; - buffer = ((char *)buffer) + rc; - nob = suio.uio_resid; - continue; - } - break; - } - return (0); -} - -/* - * XXX Liang: timeout for read is not supported yet. - */ -int -libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - CFS_DECL_NET_DATA; - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct uio ruio = { - .uio_iov = &iov, - .uio_iovcnt = 1, - .uio_offset = 0, - .uio_resid = nob, - .uio_segflg = UIO_SYSSPACE, - .uio_rw = UIO_READ, - .uio_procp = NULL - }; - - CFS_NET_IN; - rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); - CFS_NET_EX; - - if (rc != 0) { - if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ - rc == EWOULDBLOCK)) - rc = 0; - if (rc != 0) - return -rc; - rc = nob - ruio.uio_resid; - buffer = ((char *)buffer) + rc; - nob = ruio.uio_resid; - continue; - } - break; - } - return (0); -} - -int -libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) -{ - struct sockopt sopt; - int rc = 0; - int option; - CFS_DECL_NET_DATA; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_val = &option; - sopt.sopt_valsize = sizeof(option); - - if (txbufsize != 0) { - option = txbufsize; - if (option > KSOCK_MAX_BUF) - option = KSOCK_MAX_BUF; - - sopt.sopt_name = SO_SNDBUF; - CFS_NET_IN; - rc = sosetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - - return -rc; - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - sopt.sopt_name = SO_RCVBUF; - CFS_NET_IN; - rc = sosetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return -rc; - } - } - return 0; -} - -int -libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in *sin; - struct sockaddr *sa = NULL; - int rc; - CFS_DECL_NET_DATA; - - if (remote != 0) { - CFS_NET_IN; - rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa); - CFS_NET_EX; - - if (rc != 0) { - if (sa) FREE(sa, M_SONAME); - CERROR ("Error %d getting sock peer IP\n", rc); - return -rc; - } - } else { - CFS_NET_IN; - rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa); - CFS_NET_EX; - if (rc != 0) { - if (sa) FREE(sa, M_SONAME); - CERROR ("Error %d getting sock local IP\n", rc); - return -rc; - } - } - if (sa != NULL) { - sin = (struct sockaddr_in *)sa; - if (ip != NULL) - *ip = ntohl (sin->sin_addr.s_addr); - if (port != NULL) - *port = ntohs (sin->sin_port); - if (sa) - FREE(sa, M_SONAME); - } - return 0; -} - -int -libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) -{ - struct sockopt sopt; - int rc; - CFS_DECL_NET_DATA; - - bzero(&sopt, sizeof sopt); - sopt.sopt_dir = SOPT_GET; - sopt.sopt_level = SOL_SOCKET; - - if (txbufsize != NULL) { - sopt.sopt_val = txbufsize; - sopt.sopt_valsize = sizeof(*txbufsize); - sopt.sopt_name = SO_SNDBUF; - CFS_NET_IN; - rc = sogetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't get send buffer size: %d\n", rc); - return -rc; - } - } - - if (rxbufsize != NULL) { - sopt.sopt_val = rxbufsize; - sopt.sopt_valsize = sizeof(*rxbufsize); - sopt.sopt_name = SO_RCVBUF; - CFS_NET_IN; - rc = sogetopt(sock, &sopt); - CFS_NET_EX; - if (rc != 0) { - CERROR ("Can't get receive buffer size: %d\n", rc); - return -rc; - } - } - return 0; -} - -int -libcfs_sock_connect (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - struct socket *so; - int s; - int rc; - CFS_DECL_FUNNEL_DATA; - - rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) - return rc; - so = *sockp; - bzero(&srvaddr, sizeof(srvaddr)); - srvaddr.sin_len = sizeof(struct sockaddr_in); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons (peer_port); - srvaddr.sin_addr.s_addr = htonl (peer_ip); - - CFS_NET_IN; - rc = soconnect(so, (struct sockaddr *)&srvaddr); - if (rc != 0) { - CFS_NET_EX; - if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) - CDEBUG(D_NETERROR, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - goto out; - } - s = splnet(); - while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { - CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n"); - (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz); - } - if ((rc = so->so_error) != 0) { - so->so_error = 0; - splx(s); - CFS_NET_EX; - CDEBUG(D_NETERROR, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - goto out; - } - LASSERT(so->so_state & SS_ISCONNECTED); - splx(s); - CFS_NET_EX; - if (sockp) - *sockp = so; - return (0); -out: - CFS_NET_IN; - soshutdown(so, 2); - soclose(so); - CFS_NET_EX; - return (-rc); -} - -void -libcfs_sock_release (struct socket *sock) -{ - CFS_DECL_FUNNEL_DATA; - CFS_NET_IN; - soshutdown(sock, 0); - CFS_NET_EX; -} - -#endif diff --git a/lnet/libcfs/darwin/darwin-tracefile.c b/lnet/libcfs/darwin/darwin-tracefile.c deleted file mode 100644 index bb1dc7297514ca91e7d2b974020c1430d1a75e5a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-tracefile.c +++ /dev/null @@ -1,283 +0,0 @@ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -/* - * We can't support smp tracefile currently. - * Everything is put on one cpu. - */ - -#define M_TCD_MAX_PAGES (128 * 1280) -extern union trace_data_union trace_data[NR_CPUS]; -extern char *tracefile; -extern long long tracefile_size; -extern int trace_start_thread(void); -extern void trace_stop_thread(void); - -long max_debug_mb = M_TCD_MAX_PAGES; -static long max_permit_mb = (64 * 1024); - -spinlock_t trace_cpu_serializer; - -/* - * thread currently executing tracefile code or NULL if none does. Used to - * detect recursive calls to libcfs_debug_msg(). - */ -static thread_t trace_owner = NULL; - -extern int get_preemption_level(void); -extern atomic_t tage_allocated; - -struct rw_semaphore tracefile_sem; - -int tracefile_init_arch() { - init_rwsem(&tracefile_sem); -#error "Todo: initialise per-cpu console buffers" - return 0; -} - -void tracefile_fini_arch() { -} - -void tracefile_read_lock() { - down_read(&tracefile_sem); -} - -void tracefile_read_unlock() { - up_read(&tracefile_sem); -} - -void tracefile_write_lock() { - down_write(&tracefile_sem); -} - -void tracefile_write_unlock() { - up_write(&tracefile_sem); -} - -char *trace_get_console_buffer(void) -{ -#error "todo: return a per-cpu/interrupt console buffer and disable pre-emption" -} - -void trace_put_console_buffer(char *buffer) -{ -#error "todo: re-enable pre-emption" -} - -struct trace_cpu_data *trace_get_tcd(void) -{ - struct trace_cpu_data *tcd; - int nr_pages; - struct list_head pages; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - /* - * debugging check for recursive call to libcfs_debug_msg() - */ - if (trace_owner == current_thread()) { - /* - * Cannot assert here. - */ - printk(KERN_EMERG "recursive call to %s", __FUNCTION__); - /* - * "The death of God left the angels in a strange position." - */ - cfs_enter_debugger(); - } - tcd = &trace_data[0].tcd; - CFS_INIT_LIST_HEAD(&pages); - if (get_preemption_level() == 0) - nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages); - else - nr_pages = 0; - spin_lock(&trace_cpu_serializer); - trace_owner = current_thread(); - tcd->tcd_cur_stock_pages += nr_pages; - list_splice(&pages, &tcd->tcd_stock_pages); - return tcd; -} - -extern void raw_page_death_row_clean(void); - -void __trace_put_tcd(struct trace_cpu_data *tcd) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - LASSERT(trace_owner == current_thread()); - trace_owner = NULL; - spin_unlock(&trace_cpu_serializer); - if (get_preemption_level() == 0) - /* purge all pending pages */ - raw_page_death_row_clean(); -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - /* XNU has global tcd, and all pages are owned by it */ - return 1; -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - do_gettimeofday(&tv); - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = cfs_curproc_pid(); - header->ph_line_num = line; - header->ph_extern_pid = (__u32)current_thread(); -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = "Lustre", *ptype = KERN_INFO; - - /* - * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %*s", - ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid, - file, hdr->ph_line_num, fn, len, buf); - } -} - -/* - * Sysctl handle of libcfs - */ -#define MAX_TRACEFILE_PATH_LEN 256 -int cfs_trace_daemon SYSCTL_HANDLER_ARGS -{ - int error = 0; - char *name = NULL; - - if (req->newptr == USER_ADDR_NULL) { - /* a read */ - if (tracefile) - error = sysctl_handle_string(oidp, tracefile, 0, req); - else - error = sysctl_handle_string(oidp, "NA", 0, req); - - return error; - } - - /* now hanle write requests */ - MALLOC(name, char *, MAX_TRACEFILE_PATH_LEN + 1, M_TEMP, M_WAITOK | M_ZERO); - if (name == NULL) - return -ENOMEM; - name[0] = '\0'; - tracefile_write_lock(); - error = sysctl_handle_string(oidp, name, MAX_TRACEFILE_PATH_LEN + 1, req); - if (!error) { - if (strcmp(name, "stop") == 0) { - /* stop tracefile daemon */ - tracefile = NULL; - trace_stop_thread(); - goto out; - }else if (strncmp(name, "size=", 5) == 0) { - tracefile_size = simple_strtoul(name + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; - goto out; - - } - if (name[0] != '/') { - error = -EINVAL; - goto out; - } - if (tracefile != NULL) - cfs_free(tracefile); - tracefile = name; - name = NULL; - trace_start_thread(); - } else { - /* Something was wrong with the write request */ - printf("sysctl debug daemon failed: %d.\n", error); - goto out; - } -out: - if (name != NULL) - FREE(name, M_TEMP); - tracefile_write_unlock(); - return error; -} -#undef MAX_TRACEFILE_PATH_LEN - - -int cfs_debug_mb SYSCTL_HANDLER_ARGS -{ - int i; - int error = 0; - - error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); - if (!error && req->newptr != USER_ADDR_NULL) { - /* We have a new value stored in the standard location */ - if (max_debug_mb <= 0) - return -EINVAL; - if (max_debug_mb > max_permit_mb) { - printf("sysctl debug_mb is too big: %d.\n", max_debug_mb); - return 0; - } - for (i = 0; i < NR_CPUS; i++) { - struct trace_cpu_data *tcd; - tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max_debug_mb; - } - } else if (req->newptr != USER_ADDR_NULL) { - /* Something was wrong with the write request */ - printf ("sysctl debug_mb fault: %d.\n", error); - } - - return error; -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ -#error "tbd" -} diff --git a/lnet/libcfs/darwin/darwin-utils.c b/lnet/libcfs/darwin/darwin-utils.c deleted file mode 100644 index cfd7a2d6a8b533bd9005a766094df8ae1f9f091e..0000000000000000000000000000000000000000 --- a/lnet/libcfs/darwin/darwin-utils.c +++ /dev/null @@ -1,578 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Darwin porting library - * Make things easy to port - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <mach/mach_types.h> -#include <string.h> -#include <sys/errno.h> -#include <sys/types.h> -#include <sys/fcntl.h> -#include <lnet/types.h> - -#include <libcfs/kp30.h> - -#ifndef isspace -inline int -isspace(char c) -{ - return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); -} -#endif - -char * strpbrk(const char * cs,const char * ct) -{ - const char *sc1,*sc2; - - for( sc1 = cs; *sc1 != '\0'; ++sc1) { - for( sc2 = ct; *sc2 != '\0'; ++sc2) { - if (*sc1 == *sc2) - return (char *) sc1; - } - } - return NULL; -} - -char * strsep(char **s, const char *ct) -{ - char *sbegin = *s, *end; - - if (sbegin == NULL) - return NULL; - end = strpbrk(sbegin, ct); - if (end != NULL) - *end++ = '\0'; - *s = end; - - return sbegin; -} - -size_t strnlen(const char * s, size_t count) -{ - const char *sc; - - for (sc = s; count-- && *sc != '\0'; ++sc) - /* nothing */; - return sc - s; -} - -char * -strstr(const char *in, const char *str) -{ - char c; - size_t len; - - c = *str++; - if (!c) - return (char *) in; // Trivial empty string case - len = strlen(str); - do { - char sc; - do { - sc = *in++; - if (!sc) - return (char *) 0; - } while (sc != c); - } while (strncmp(in, str, len) != 0); - return (char *) (in - 1); -} - -char * -strrchr(const char *p, int ch) -{ - const char *end = p + strlen(p); - do { - if (*end == (char)ch) - return (char *)end; - } while (--end >= p); - return NULL; -} - -char * -ul2dstr(unsigned long address, char *buf, int len) -{ - char *pos = buf + len - 1; - - if (len <= 0 || !buf) - return NULL; - *pos = 0; - while (address) { - if (!--len) break; - *--pos = address % 10 + '0'; - address /= 10; - } - return pos; -} - -/* - * miscellaneous libcfs stuff - */ - -/* - * Convert server error code to client format. - * Linux errno.h. - */ - -/* obtained by - * - * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3 - * - */ -enum linux_errnos { - LINUX_EPERM = 1, - LINUX_ENOENT = 2, - LINUX_ESRCH = 3, - LINUX_EINTR = 4, - LINUX_EIO = 5, - LINUX_ENXIO = 6, - LINUX_E2BIG = 7, - LINUX_ENOEXEC = 8, - LINUX_EBADF = 9, - LINUX_ECHILD = 10, - LINUX_EAGAIN = 11, - LINUX_ENOMEM = 12, - LINUX_EACCES = 13, - LINUX_EFAULT = 14, - LINUX_ENOTBLK = 15, - LINUX_EBUSY = 16, - LINUX_EEXIST = 17, - LINUX_EXDEV = 18, - LINUX_ENODEV = 19, - LINUX_ENOTDIR = 20, - LINUX_EISDIR = 21, - LINUX_EINVAL = 22, - LINUX_ENFILE = 23, - LINUX_EMFILE = 24, - LINUX_ENOTTY = 25, - LINUX_ETXTBSY = 26, - LINUX_EFBIG = 27, - LINUX_ENOSPC = 28, - LINUX_ESPIPE = 29, - LINUX_EROFS = 30, - LINUX_EMLINK = 31, - LINUX_EPIPE = 32, - LINUX_EDOM = 33, - LINUX_ERANGE = 34, - LINUX_EDEADLK = 35, - LINUX_ENAMETOOLONG = 36, - LINUX_ENOLCK = 37, - LINUX_ENOSYS = 38, - LINUX_ENOTEMPTY = 39, - LINUX_ELOOP = 40, - LINUX_ENOMSG = 42, - LINUX_EIDRM = 43, - LINUX_ECHRNG = 44, - LINUX_EL2NSYNC = 45, - LINUX_EL3HLT = 46, - LINUX_EL3RST = 47, - LINUX_ELNRNG = 48, - LINUX_EUNATCH = 49, - LINUX_ENOCSI = 50, - LINUX_EL2HLT = 51, - LINUX_EBADE = 52, - LINUX_EBADR = 53, - LINUX_EXFULL = 54, - LINUX_ENOANO = 55, - LINUX_EBADRQC = 56, - LINUX_EBADSLT = 57, - LINUX_EBFONT = 59, - LINUX_ENOSTR = 60, - LINUX_ENODATA = 61, - LINUX_ETIME = 62, - LINUX_ENOSR = 63, - LINUX_ENONET = 64, - LINUX_ENOPKG = 65, - LINUX_EREMOTE = 66, - LINUX_ENOLINK = 67, - LINUX_EADV = 68, - LINUX_ESRMNT = 69, - LINUX_ECOMM = 70, - LINUX_EPROTO = 71, - LINUX_EMULTIHOP = 72, - LINUX_EDOTDOT = 73, - LINUX_EBADMSG = 74, - LINUX_EOVERFLOW = 75, - LINUX_ENOTUNIQ = 76, - LINUX_EBADFD = 77, - LINUX_EREMCHG = 78, - LINUX_ELIBACC = 79, - LINUX_ELIBBAD = 80, - LINUX_ELIBSCN = 81, - LINUX_ELIBMAX = 82, - LINUX_ELIBEXEC = 83, - LINUX_EILSEQ = 84, - LINUX_ERESTART = 85, - LINUX_ESTRPIPE = 86, - LINUX_EUSERS = 87, - LINUX_ENOTSOCK = 88, - LINUX_EDESTADDRREQ = 89, - LINUX_EMSGSIZE = 90, - LINUX_EPROTOTYPE = 91, - LINUX_ENOPROTOOPT = 92, - LINUX_EPROTONOSUPPORT = 93, - LINUX_ESOCKTNOSUPPORT = 94, - LINUX_EOPNOTSUPP = 95, - LINUX_EPFNOSUPPORT = 96, - LINUX_EAFNOSUPPORT = 97, - LINUX_EADDRINUSE = 98, - LINUX_EADDRNOTAVAIL = 99, - LINUX_ENETDOWN = 100, - LINUX_ENETUNREACH = 101, - LINUX_ENETRESET = 102, - LINUX_ECONNABORTED = 103, - LINUX_ECONNRESET = 104, - LINUX_ENOBUFS = 105, - LINUX_EISCONN = 106, - LINUX_ENOTCONN = 107, - LINUX_ESHUTDOWN = 108, - LINUX_ETOOMANYREFS = 109, - LINUX_ETIMEDOUT = 110, - LINUX_ECONNREFUSED = 111, - LINUX_EHOSTDOWN = 112, - LINUX_EHOSTUNREACH = 113, - LINUX_EALREADY = 114, - LINUX_EINPROGRESS = 115, - LINUX_ESTALE = 116, - LINUX_EUCLEAN = 117, - LINUX_ENOTNAM = 118, - LINUX_ENAVAIL = 119, - LINUX_EISNAM = 120, - LINUX_EREMOTEIO = 121, - LINUX_EDQUOT = 122, - LINUX_ENOMEDIUM = 123, - LINUX_EMEDIUMTYPE = 124, - - /* - * we don't need these, but for completeness.. - */ - LINUX_EDEADLOCK = LINUX_EDEADLK, - LINUX_EWOULDBLOCK = LINUX_EAGAIN -}; - -int convert_server_error(__u64 ecode) -{ - int sign; - int code; - - static int errno_xlate[] = { - /* success is always success */ - [0] = 0, - [LINUX_EPERM] = EPERM, - [LINUX_ENOENT] = ENOENT, - [LINUX_ESRCH] = ESRCH, - [LINUX_EINTR] = EINTR, - [LINUX_EIO] = EIO, - [LINUX_ENXIO] = ENXIO, - [LINUX_E2BIG] = E2BIG, - [LINUX_ENOEXEC] = ENOEXEC, - [LINUX_EBADF] = EBADF, - [LINUX_ECHILD] = ECHILD, - [LINUX_EAGAIN] = EAGAIN, - [LINUX_ENOMEM] = ENOMEM, - [LINUX_EACCES] = EACCES, - [LINUX_EFAULT] = EFAULT, - [LINUX_ENOTBLK] = ENOTBLK, - [LINUX_EBUSY] = EBUSY, - [LINUX_EEXIST] = EEXIST, - [LINUX_EXDEV] = EXDEV, - [LINUX_ENODEV] = ENODEV, - [LINUX_ENOTDIR] = ENOTDIR, - [LINUX_EISDIR] = EISDIR, - [LINUX_EINVAL] = EINVAL, - [LINUX_ENFILE] = ENFILE, - [LINUX_EMFILE] = EMFILE, - [LINUX_ENOTTY] = ENOTTY, - [LINUX_ETXTBSY] = ETXTBSY, - [LINUX_EFBIG] = EFBIG, - [LINUX_ENOSPC] = ENOSPC, - [LINUX_ESPIPE] = ESPIPE, - [LINUX_EROFS] = EROFS, - [LINUX_EMLINK] = EMLINK, - [LINUX_EPIPE] = EPIPE, - [LINUX_EDOM] = EDOM, - [LINUX_ERANGE] = ERANGE, - [LINUX_EDEADLK] = EDEADLK, - [LINUX_ENAMETOOLONG] = ENAMETOOLONG, - [LINUX_ENOLCK] = ENOLCK, - [LINUX_ENOSYS] = ENOSYS, - [LINUX_ENOTEMPTY] = ENOTEMPTY, - [LINUX_ELOOP] = ELOOP, - [LINUX_ENOMSG] = ENOMSG, - [LINUX_EIDRM] = EIDRM, - [LINUX_ECHRNG] = EINVAL /* ECHRNG */, - [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */, - [LINUX_EL3HLT] = EINVAL /* EL3HLT */, - [LINUX_EL3RST] = EINVAL /* EL3RST */, - [LINUX_ELNRNG] = EINVAL /* ELNRNG */, - [LINUX_EUNATCH] = EINVAL /* EUNATCH */, - [LINUX_ENOCSI] = EINVAL /* ENOCSI */, - [LINUX_EL2HLT] = EINVAL /* EL2HLT */, - [LINUX_EBADE] = EINVAL /* EBADE */, - [LINUX_EBADR] = EBADRPC, - [LINUX_EXFULL] = EINVAL /* EXFULL */, - [LINUX_ENOANO] = EINVAL /* ENOANO */, - [LINUX_EBADRQC] = EINVAL /* EBADRQC */, - [LINUX_EBADSLT] = EINVAL /* EBADSLT */, - [LINUX_EBFONT] = EINVAL /* EBFONT */, - [LINUX_ENOSTR] = EINVAL /* ENOSTR */, - [LINUX_ENODATA] = EINVAL /* ENODATA */, - [LINUX_ETIME] = EINVAL /* ETIME */, - [LINUX_ENOSR] = EINVAL /* ENOSR */, - [LINUX_ENONET] = EINVAL /* ENONET */, - [LINUX_ENOPKG] = EINVAL /* ENOPKG */, - [LINUX_EREMOTE] = EREMOTE, - [LINUX_ENOLINK] = EINVAL /* ENOLINK */, - [LINUX_EADV] = EINVAL /* EADV */, - [LINUX_ESRMNT] = EINVAL /* ESRMNT */, - [LINUX_ECOMM] = EINVAL /* ECOMM */, - [LINUX_EPROTO] = EPROTOTYPE, - [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */, - [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */, - [LINUX_EBADMSG] = EINVAL /* EBADMSG */, - [LINUX_EOVERFLOW] = EOVERFLOW, - [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */, - [LINUX_EBADFD] = EINVAL /* EBADFD */, - [LINUX_EREMCHG] = EINVAL /* EREMCHG */, - [LINUX_ELIBACC] = EINVAL /* ELIBACC */, - [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */, - [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */, - [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */, - [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */, - [LINUX_EILSEQ] = EILSEQ, - [LINUX_ERESTART] = EINVAL /* because ERESTART is - * negative in XNU */, - [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */, - [LINUX_EUSERS] = EUSERS, - [LINUX_ENOTSOCK] = ENOTSOCK, - [LINUX_EDESTADDRREQ] = EDESTADDRREQ, - [LINUX_EMSGSIZE] = EMSGSIZE, - [LINUX_EPROTOTYPE] = EPROTOTYPE, - [LINUX_ENOPROTOOPT] = ENOPROTOOPT, - [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT, - [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, - [LINUX_EOPNOTSUPP] = EOPNOTSUPP, - [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT, - [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT, - [LINUX_EADDRINUSE] = EADDRINUSE, - [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL, - [LINUX_ENETDOWN] = ENETDOWN, - [LINUX_ENETUNREACH] = ENETUNREACH, - [LINUX_ENETRESET] = ENETRESET, - [LINUX_ECONNABORTED] = ECONNABORTED, - [LINUX_ECONNRESET] = ECONNRESET, - [LINUX_ENOBUFS] = ENOBUFS, - [LINUX_EISCONN] = EISCONN, - [LINUX_ENOTCONN] = ENOTCONN, - [LINUX_ESHUTDOWN] = ESHUTDOWN, - [LINUX_ETOOMANYREFS] = ETOOMANYREFS, - [LINUX_ETIMEDOUT] = ETIMEDOUT, - [LINUX_ECONNREFUSED] = ECONNREFUSED, - [LINUX_EHOSTDOWN] = EHOSTDOWN, - [LINUX_EHOSTUNREACH] = EHOSTUNREACH, - [LINUX_EALREADY] = EALREADY, - [LINUX_EINPROGRESS] = EINPROGRESS, - [LINUX_ESTALE] = ESTALE, - [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */, - [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */, - [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */, - [LINUX_EISNAM] = EINVAL /* EISNAM */, - [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */, - [LINUX_EDQUOT] = EDQUOT, - [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */, - [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */, - }; - code = (int)ecode; - if (code >= 0) { - sign = +1; - } else { - sign = -1; - code = -code; - } - if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) { - code = errno_xlate[code]; - LASSERT(code >= 0); - } - return sign * code; -} - -enum { - LINUX_O_RDONLY = 00, - LINUX_O_WRONLY = 01, - LINUX_O_RDWR = 02, - LINUX_O_CREAT = 0100, - LINUX_O_EXCL = 0200, - LINUX_O_NOCTTY = 0400, - LINUX_O_TRUNC = 01000, - LINUX_O_APPEND = 02000, - LINUX_O_NONBLOCK = 04000, - LINUX_O_NDELAY = LINUX_O_NONBLOCK, - LINUX_O_SYNC = 010000, - LINUX_O_FSYNC = LINUX_O_SYNC, - LINUX_O_ASYNC = 020000, - LINUX_O_DIRECT = 040000, - LINUX_O_NOFOLLOW = 0400000 -}; - -static inline void obit_convert(int *cflag, int *sflag, - unsigned cmask, unsigned smask) -{ - if (*cflag & cmask != 0) { - *sflag |= smask; - *cflag &= ~cmask; - } -} - -/* - * convert <fcntl.h> flag from XNU client to Linux _i386_ server. - */ -int convert_client_oflag(int cflag, int *result) -{ - int sflag = 0; - - cflag = 0; - obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY); - obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY); - obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR); - obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK); - obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND); - obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC); - obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC); - obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW); - obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); - obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC); - obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL); - obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); - obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY); - obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY); - /* - * Some more obscure BSD flags have no Linux counterparts: - * - * O_SHLOCK 0x0010 - * O_EXLOCK 0x0020 - * O_EVTONLY 0x8000 - * O_POPUP 0x80000000 - * O_ALERT 0x20000000 - */ - if (cflag == 0) { - *result = sflag; - return 0; - } else - return -EINVAL; -} - -#ifdef __DARWIN8__ -#else /* !__DARWIN8__ */ -extern int unix_syscall(); -extern int unix_syscall_return(); - -extern int ktrsysret(); -extern int ktrace(); - -extern int ast_taken(); -extern int ast_check(); - -extern int trap(); -extern int syscall_trace(); - -static int is_addr_in_range(void *addr, void *start, void *end) -{ - return start <= addr && addr <= end; -} - -extern void cfs_thread_agent (void); - -static int is_last_frame(void *addr) -{ - if (addr == NULL) - return 1; - else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return)) - return 1; - else if (is_addr_in_range(addr, ktrsysret, ktrace)) - return 1; - else if (is_addr_in_range(addr, ast_taken, ast_check)) - return 1; - else if (is_addr_in_range(addr, trap, syscall_trace)) - return 1; - else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread)) - return 1; - else - return 0; -} - -static void *get_frame(int i) -{ - void *result; - -#define CASE(i) case (i): result = __builtin_return_address(i); break - switch (i + 1) { - CASE(1); - CASE(2); - CASE(3); - CASE(4); - CASE(5); - CASE(6); - CASE(7); - CASE(8); - CASE(9); - CASE(10); - CASE(11); - CASE(12); - CASE(13); - CASE(14); - CASE(15); - CASE(16); - CASE(17); - CASE(18); - CASE(19); - CASE(20); - default: - panic("impossible frame number: %d\n", i); - result = NULL; - } - return result; -} - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ - int i; - - memset(trace, 0, sizeof *trace); - for (i = 0; i < sizeof_array(trace->frame); ++ i) { - void *addr; - - addr = get_frame(i); - trace->frame[i] = addr; - if (is_last_frame(addr)) - break; - } -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) - return trace->frame[frame_no]; - else - return NULL; -} -#endif /* !__DARWIN8__ */ diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c deleted file mode 100644 index 5de53f413a670a9903dd82fbab126ce9c3808774..0000000000000000000000000000000000000000 --- a/lnet/libcfs/debug.c +++ /dev/null @@ -1,725 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -static char debug_file_name[1024]; - -#ifdef __KERNEL__ -unsigned int libcfs_subsystem_debug = ~0; -EXPORT_SYMBOL(libcfs_subsystem_debug); - -unsigned int libcfs_debug = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE | - D_NETERROR | D_HA | D_CONFIG | D_IOCTL | - D_DLMTRACE | D_RPCTRACE | D_VFSTRACE); -EXPORT_SYMBOL(libcfs_debug); - -unsigned int libcfs_printk; -EXPORT_SYMBOL(libcfs_printk); - -unsigned int libcfs_console_ratelimit = 1; -EXPORT_SYMBOL(libcfs_console_ratelimit); - -unsigned int libcfs_debug_binary = 1; -EXPORT_SYMBOL(libcfs_debug_binary); - -unsigned int libcfs_stack; -EXPORT_SYMBOL(libcfs_stack); - -unsigned int portal_enter_debugger; -EXPORT_SYMBOL(portal_enter_debugger); - -unsigned int libcfs_catastrophe; -EXPORT_SYMBOL(libcfs_catastrophe); - -atomic_t libcfs_kmemory = ATOMIC_INIT(0); -EXPORT_SYMBOL(libcfs_kmemory); - -static cfs_waitq_t debug_ctlwq; - -char debug_file_path[1024] = "/tmp/lustre-log"; - -int libcfs_panic_in_progress; - -/* libcfs_debug_token2mask() expects the returned - * string in lower-case */ -const char * -libcfs_debug_subsys2str(int subsys) -{ - switch (subsys) { - default: - return NULL; - case S_UNDEFINED: - return "undefined"; - case S_MDC: - return "mdc"; - case S_MDS: - return "mds"; - case S_OSC: - return "osc"; - case S_OST: - return "ost"; - case S_CLASS: - return "class"; - case S_LOG: - return "log"; - case S_LLITE: - return "llite"; - case S_RPC: - return "rpc"; - case S_LNET: - return "lnet"; - case S_LND: - return "lnd"; - case S_PINGER: - return "pinger"; - case S_FILTER: - return "filter"; - case S_ECHO: - return "echo"; - case S_LDLM: - return "ldlm"; - case S_LOV: - return "lov"; - case S_LMV: - return "lmv"; - case S_SEC: - return "sec"; - case S_GSS: - return "gss"; - case S_MGC: - return "mgc"; - case S_MGS: - return "mgs"; - case S_FID: - return "fid"; - case S_FLD: - return "fld"; - } -} - -/* libcfs_debug_token2mask() expects the returned - * string in lower-case */ -const char * -libcfs_debug_dbg2str(int debug) -{ - switch (debug) { - default: - return NULL; - case D_TRACE: - return "trace"; - case D_INODE: - return "inode"; - case D_SUPER: - return "super"; - case D_EXT2: - return "ext2"; - case D_MALLOC: - return "malloc"; - case D_CACHE: - return "cache"; - case D_INFO: - return "info"; - case D_IOCTL: - return "ioctl"; - case D_NETERROR: - return "neterror"; - case D_NET: - return "net"; - case D_WARNING: - return "warning"; - case D_BUFFS: - return "buffs"; - case D_OTHER: - return "other"; - case D_DENTRY: - return "dentry"; - case D_NETTRACE: - return "nettrace"; - case D_PAGE: - return "page"; - case D_DLMTRACE: - return "dlmtrace"; - case D_ERROR: - return "error"; - case D_EMERG: - return "emerg"; - case D_HA: - return "ha"; - case D_RPCTRACE: - return "rpctrace"; - case D_VFSTRACE: - return "vfstrace"; - case D_READA: - return "reada"; - case D_MMAP: - return "mmap"; - case D_CONFIG: - return "config"; - case D_CONSOLE: - return "console"; - case D_QUOTA: - return "quota"; - case D_SEC: - return "sec"; - } -} - -int -libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int len = 0; - const char *token; - int bit; - int i; - - if (mask == 0) { /* "0" */ - if (size > 0) - str[0] = '0'; - len = 1; - } else { /* space-separated tokens */ - for (i = 0; i < 32; i++) { - bit = 1 << i; - - if ((mask & bit) == 0) - continue; - - token = fn(bit); - if (token == NULL) /* unused bit */ - continue; - - if (len > 0) { /* separator? */ - if (len < size) - str[len] = ' '; - len++; - } - - while (*token != 0) { - if (len < size) - str[len] = *token; - token++; - len++; - } - } - } - - /* terminate 'str' */ - if (len < size) - str[len] = 0; - else - str[size - 1] = 0; - - return len; -} - -int -libcfs_debug_token2mask(int *mask, const char *str, int len, int is_subsys) -{ - const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : - libcfs_debug_dbg2str; - int i; - int j; - int bit; - const char *token; - - /* match against known tokens */ - for (i = 0; i < 32; i++) { - bit = 1 << i; - - token = fn(bit); - if (token == NULL) /* unused? */ - continue; - - /* strcasecmp */ - for (j = 0; ; j++) { - if (j == len) { /* end of token */ - if (token[j] == 0) { - *mask = bit; - return 0; - } - break; - } - - if (token[j] == 0) - break; - - if (str[j] == token[j]) - continue; - - if (str[j] < 'A' || 'Z' < str[j]) - break; - - if (str[j] - 'A' + 'a' != token[j]) - break; - } - } - - return -EINVAL; /* no match */ -} - -int -libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) -{ - int m = 0; - int matched = 0; - char op = 0; - int n; - int t; - - /* <str> must be a list of debug tokens or numbers separated by - * whitespace and optionally an operator ('+' or '-'). If an operator - * appears first in <str>, '*mask' is used as the starting point - * (relative), otherwise 0 is used (absolute). An operator applies to - * all following tokens up to the next operator. */ - - while (*str != 0) { - while (isspace(*str)) /* skip whitespace */ - str++; - - if (*str == 0) - break; - - if (*str == '+' || *str == '-') { - op = *str++; - - /* op on first token == relative */ - if (!matched) - m = *mask; - - while (isspace(*str)) /* skip whitespace */ - str++; - - if (*str == 0) /* trailing op */ - return -EINVAL; - } - - /* find token length */ - for (n = 0; str[n] != 0 && !isspace(str[n]); n++); - - /* match token */ - if (libcfs_debug_token2mask(&t, str, n, is_subsys) != 0) - return -EINVAL; - - matched = 1; - if (op == '-') - m &= ~t; - else - m |= t; - - str += n; - } - - if (!matched) - return -EINVAL; - - *mask = m; - return 0; -} - -void libcfs_debug_dumplog_internal(void *arg) -{ - CFS_DECL_JOURNAL_DATA; - - CFS_PUSH_JOURNAL; - - snprintf(debug_file_name, sizeof(debug_file_path) - 1, "%s.%ld.%ld", - debug_file_path, cfs_time_current_sec(), (long)arg); - printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); - tracefile_dump_all_pages(debug_file_name); - - CFS_POP_JOURNAL; -} - -int libcfs_debug_dumplog_thread(void *arg) -{ - cfs_daemonize(""); - libcfs_debug_dumplog_internal(arg); - cfs_waitq_signal(&debug_ctlwq); - return 0; -} - -void libcfs_debug_dumplog(void) -{ - int rc; - cfs_waitlink_t wait; - ENTRY; - - /* we're being careful to ensure that the kernel thread is - * able to set our state to running as it exits before we - * get to schedule() */ - cfs_waitlink_init(&wait); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_add(&debug_ctlwq, &wait); - - rc = cfs_kernel_thread(libcfs_debug_dumplog_thread, - (void *)(long)cfs_curproc_pid(), - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) - printk(KERN_ERR "LustreError: cannot start log dump thread: " - "%d\n", rc); - else - cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE); - - /* be sure to teardown if kernel_thread() failed */ - cfs_waitq_del(&debug_ctlwq, &wait); - set_current_state(TASK_RUNNING); -} - -int libcfs_debug_init(unsigned long bufsize) -{ - int rc; - - cfs_waitq_init(&debug_ctlwq); - rc = tracefile_init(); - - if (rc == 0) - libcfs_register_panic_notifier(); - - return rc; -} - -int libcfs_debug_cleanup(void) -{ - libcfs_unregister_panic_notifier(); - tracefile_exit(); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - trace_flush_pages(); - return 0; -} - -/* Debug markers, although printed by S_LNET - * should not be be marked as such. */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int libcfs_debug_mark_buffer(char *text) -{ - CDEBUG(D_TRACE,"***************************************************\n"); - CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); - CDEBUG(D_TRACE,"***************************************************\n"); - - return 0; -} -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_LNET - -void libcfs_debug_set_level(unsigned int debug_level) -{ - printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", - debug_level); - libcfs_debug = debug_level; -} - -EXPORT_SYMBOL(libcfs_debug_dumplog); -EXPORT_SYMBOL(libcfs_debug_set_level); - - -#else /* !__KERNEL__ */ - -#include <libcfs/libcfs.h> - -#ifdef HAVE_SYS_USER_H -# include <sys/user.h> -#endif - -#ifdef HAVE_CATAMOUNT_DATA_H -#include <catamount/data.h> -#include <catamount/lputs.h> - -static char source_nid[16]; -/* 0 indicates no messages to console, 1 is errors, > 1 is all debug messages */ -static int toconsole = 1; -unsigned int libcfs_console_ratelimit = 1; -#else /* !HAVE_CATAMOUNT_DATA_H */ -#ifdef HAVE_NETDB_H -#include <sys/utsname.h> -#endif /* HAVE_CATAMOUNT_DATA_H */ -struct utsname *tmp_utsname; -static char source_nid[sizeof(tmp_utsname->nodename)]; -#endif /* __KERNEL__ */ - -static int source_pid; -int smp_processor_id = 1; -char debug_file_path[1024]; -FILE *debug_file_fd; - -int portals_do_debug_dumplog(void *arg) -{ - printf("Look in %s\n", debug_file_name); - return 0; -} - - -void portals_debug_print(void) -{ - return; -} - - -void libcfs_debug_dumplog(void) -{ - printf("Look in %s\n", debug_file_name); - return; -} - -int libcfs_debug_init(unsigned long bufsize) -{ - char *debug_mask = NULL; - char *debug_subsys = NULL; - char *debug_filename; - -#ifdef HAVE_CATAMOUNT_DATA_H - char *debug_console = NULL; - char *debug_ratelimit = NULL; - - snprintf(source_nid, sizeof(source_nid) - 1, "%u", _my_pnid); - source_pid = _my_pid; - - debug_console = getenv("LIBLUSTRE_DEBUG_CONSOLE"); - if (debug_console != NULL) { - toconsole = strtoul(debug_console, NULL, 0); - CDEBUG(D_INFO, "set liblustre toconsole to %u\n", toconsole); - } - debug_ratelimit = getenv("LIBLUSTRE_DEBUG_CONSOLE_RATELIMIT"); - if (debug_ratelimit != NULL) { - libcfs_console_ratelimit = strtoul(debug_ratelimit, NULL, 0); - CDEBUG(D_INFO, "set liblustre console ratelimit to %u\n", libcfs_console_ratelimit); - } -#else - struct utsname myname; - - if (uname(&myname) == 0) - strcpy(source_nid, myname.nodename); - source_pid = getpid(); -#endif - /* debug masks */ - debug_mask = getenv("LIBLUSTRE_DEBUG_MASK"); - if (debug_mask) - libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0); - - debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS"); - if (debug_subsys) - libcfs_subsystem_debug = - (unsigned int) strtol(debug_subsys, NULL, 0); - - debug_filename = getenv("LIBLUSTRE_DEBUG_BASE"); - if (debug_filename) - strncpy(debug_file_path,debug_filename,sizeof(debug_file_path)); - - debug_filename = getenv("LIBLUSTRE_DEBUG_FILE"); - if (debug_filename) - strncpy(debug_file_name,debug_filename,sizeof(debug_file_path)); - - if (debug_file_name[0] == '\0' && debug_file_path[0] != '\0') - snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s-%s-%lu.log", debug_file_path, source_nid, time(0)); - - if (strcmp(debug_file_name, "stdout") == 0 || - strcmp(debug_file_name, "-") == 0) { - debug_file_fd = stdout; - } else if (strcmp(debug_file_name, "stderr") == 0) { - debug_file_fd = stderr; - } else if (debug_file_name[0] != '\0') { - debug_file_fd = fopen(debug_file_name, "w"); - if (debug_file_fd == NULL) - fprintf(stderr, "%s: unable to open '%s': %s\n", - source_nid, debug_file_name, strerror(errno)); - } - - if (debug_file_fd == NULL) - debug_file_fd = stdout; - - return 0; -} - -int libcfs_debug_cleanup(void) -{ - if (debug_file_fd != stdout && debug_file_fd != stderr) - fclose(debug_file_fd); - return 0; -} - -int libcfs_debug_clear_buffer(void) -{ - return 0; -} - -int libcfs_debug_mark_buffer(char *text) -{ - - fprintf(debug_file_fd, "*******************************************************************************\n"); - fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); - fprintf(debug_file_fd, "*******************************************************************************\n"); - - return 0; -} - -#ifdef HAVE_CATAMOUNT_DATA_H -#define CATAMOUNT_MAXLINE (256-4) -void catamount_printline(char *buf, size_t size) -{ - char *pos = buf; - int prsize = size; - - while (prsize > 0){ - lputs(pos); - pos += CATAMOUNT_MAXLINE; - prsize -= CATAMOUNT_MAXLINE; - } -} -#endif - -int -libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, - int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) -{ - struct timeval tv; - int nob; - int remain; - va_list ap; - char buf[PAGE_SIZE]; /* size 4096 used for compatimble with linux, - * where message can`t be exceed PAGE_SIZE */ - int console = 0; - char *prefix = "Lustre"; - -#ifdef HAVE_CATAMOUNT_DATA_H - /* toconsole == 0 - all messages to debug_file_fd - * toconsole == 1 - warnings to console, all to debug_file_fd - * toconsole > 1 - all debug to console */ - if ( ((mask & D_CANTMASK) && - (toconsole == 1)) || (toconsole > 1)) { - console = 1; - } -#endif - - if ((!console) && (!debug_file_fd)) { - return 0; - } - - if (mask & (D_EMERG | D_ERROR)) - prefix = "LustreError"; - - nob = snprintf(buf, sizeof(buf), "%s: %u-%s:(%s:%d:%s()): ", prefix, - source_pid, source_nid, file, line, fn); - - remain = sizeof(buf) - nob; - if (format1) { - nob += vsnprintf(&buf[nob], remain, format1, args); - } - - remain = sizeof(buf) - nob; - if ((format2) && (remain > 0)) { - va_start(ap, format2); - nob += vsnprintf(&buf[nob], remain, format2, ap); - va_end(ap); - } - -#ifdef HAVE_CATAMOUNT_DATA_H - if (console) { - /* check rate limit for console */ - if (cdls != NULL) { - cfs_time_t t = cdls->cdls_next + - cfs_time_seconds(CDEBUG_MAX_LIMIT + 10); - cfs_duration_t dmax = cfs_time_seconds(CDEBUG_MAX_LIMIT); - - if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - - /* skipping a console message */ - cdls->cdls_count++; - goto out_file; - } - - if (cfs_time_after(cfs_time_current(), t)) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= 8; - } else { - cdls->cdls_delay *= 2; - - if (cdls->cdls_delay < CFS_TICK) - cdls->cdls_delay = CFS_TICK; - else if (cdls->cdls_delay > dmax) - cdls->cdls_delay = dmax; - } - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (cdls != NULL && cdls->cdls_count != 0) { - char buf2[100]; - - nob = snprintf(buf2, sizeof(buf2), - "Skipped %d previous similar message%s\n", - cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); - - catamount_printline(buf2, nob); - cdls->cdls_count = 0; - goto out_file; - } - catamount_printline(buf, nob); - } -out_file: - /* return on toconsole > 1, as we don't want the user getting - * spammed by the debug data */ - if (toconsole > 1) - return 0; -#endif - if (debug_file_fd == NULL) - return 0; - - gettimeofday(&tv, NULL); - - fprintf(debug_file_fd, "%lu.%06lu:%u:%s:(%s:%d:%s()): %s", - tv.tv_sec, tv.tv_usec, source_pid, source_nid, - file, line, fn, buf); - - return 0; -} - -void -libcfs_assertion_failed(const char *expr, const char *file, const char *func, - const int line) -{ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "ASSERTION(%s) failed\n", expr); - abort(); -} - -#endif /* __KERNEL__ */ diff --git a/lnet/libcfs/libcfs.xcode/project.pbxproj b/lnet/libcfs/libcfs.xcode/project.pbxproj deleted file mode 100644 index 479c21b977925d925173db71f4d96a33b2487732..0000000000000000000000000000000000000000 --- a/lnet/libcfs/libcfs.xcode/project.pbxproj +++ /dev/null @@ -1,439 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = libcfs; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 19444794072D07AD00DAF9BC = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = tracefile.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19444795072D07AD00DAF9BC = { - fileRef = 19444794072D07AD00DAF9BC; - isa = PBXBuildFile; - settings = { - }; - }; - 19444796072D08AA00DAF9BC = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = debug.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19444797072D08AA00DAF9BC = { - fileRef = 19444796072D08AA00DAF9BC; - isa = PBXBuildFile; - settings = { - }; - }; - 19509C03072CD5FF00A958C3 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19509C04072CD5FF00A958C3 = { - fileRef = 19509C03072CD5FF00A958C3; - isa = PBXBuildFile; - settings = { - }; - }; - 19713B76072E8274004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_prim.c; - path = arch/xnu/cfs_prim.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713B77072E8274004E8469 = { - fileRef = 19713B76072E8274004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713BB7072E8281004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_mem.c; - path = arch/xnu/cfs_mem.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713BB8072E8281004E8469 = { - fileRef = 19713BB7072E8281004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713BF7072E828E004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_proc.c; - path = arch/xnu/cfs_proc.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713BF8072E828E004E8469 = { - fileRef = 19713BF7072E828E004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713C7A072E82B2004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_utils.c; - path = arch/xnu/cfs_utils.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713C7B072E82B2004E8469 = { - fileRef = 19713C7A072E82B2004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713CD6072E8A56004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_module.c; - path = arch/xnu/cfs_module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713CD7072E8A56004E8469 = { - fileRef = 19713CD6072E8A56004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713D1B072E8E39004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_fs.c; - path = arch/xnu/cfs_fs.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713D1C072E8E39004E8469 = { - fileRef = 19713D1B072E8E39004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713D60072E9109004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = xnu_sync.c; - path = arch/xnu/xnu_sync.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713D61072E9109004E8469 = { - fileRef = 19713D60072E9109004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713DC2072F994D004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_tracefile.c; - path = arch/xnu/cfs_tracefile.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713DC3072F994D004E8469 = { - fileRef = 19713DC2072F994D004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19713E1C072FAFB5004E8469 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = cfs_debug.c; - path = arch/xnu/cfs_debug.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19713E1D072FAFB5004E8469 = { - fileRef = 19713E1C072FAFB5004E8469; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 19713E1C072FAFB5004E8469, - 19713DC2072F994D004E8469, - 19713D60072E9109004E8469, - 19713D1B072E8E39004E8469, - 19713CD6072E8A56004E8469, - 19713C7A072E82B2004E8469, - 19713BF7072E828E004E8469, - 19713BB7072E8281004E8469, - 19713B76072E8274004E8469, - 19444796072D08AA00DAF9BC, - 19444794072D07AD00DAF9BC, - 19509C03072CD5FF00A958C3, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = ../include; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.libcfs; - MODULE_START = libcfs_start; - MODULE_STOP = libcfs_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = libcfs; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = libcfs; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = libcfs; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 19509C04072CD5FF00A958C3, - 19444795072D07AD00DAF9BC, - 19444797072D08AA00DAF9BC, - 19713B77072E8274004E8469, - 19713BB8072E8281004E8469, - 19713BF8072E828E004E8469, - 19713C7B072E82B2004E8469, - 19713CD7072E8A56004E8469, - 19713D1C072E8E39004E8469, - 19713D61072E9109004E8469, - 19713DC3072F994D004E8469, - 19713E1D072FAFB5004E8469, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = libcfs.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/libcfs/linux/.cvsignore b/lnet/libcfs/linux/.cvsignore deleted file mode 100644 index 2bc4137ea828524412757790d1c06c9ce24fec2c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -Makefile.in -*.o.cmd diff --git a/lnet/libcfs/linux/Makefile.am b/lnet/libcfs/linux/Makefile.am deleted file mode 100644 index 8bf35ccc7742ddb59c35df71b378309746440d5d..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ - linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ - linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c - diff --git a/lnet/libcfs/linux/linux-curproc.c b/lnet/libcfs/linux/linux-curproc.c deleted file mode 100644 index e446169b34b8da52ff8b7325e28c3394cedf9e9c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-curproc.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre curproc API implementation for Linux kernel - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - * Public License for more details. You should have received a copy of the GNU - * General Public License along with Lustre; if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/sched.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -uid_t cfs_curproc_uid(void) -{ - return current->uid; -} - -gid_t cfs_curproc_gid(void) -{ - return current->gid; -} - -uid_t cfs_curproc_fsuid(void) -{ - return current->fsuid; -} - -gid_t cfs_curproc_fsgid(void) -{ - return current->fsgid; -} - -pid_t cfs_curproc_pid(void) -{ - return current->pid; -} - -int cfs_curproc_groups_nr(void) -{ - int nr; - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - task_lock(current); - nr = current->group_info->ngroups; - task_unlock(current); -#else - nr = current->ngroups; -#endif - return nr; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) - task_lock(current); - size = min_t(int, size, current->group_info->ngroups); - memcpy(array, current->group_info->blocks[0], size * sizeof(__u32)); - task_unlock(current); -#else - LASSERT(size <= NGROUPS); - size = min_t(int, size, current->ngroups); - memcpy(array, current->groups, size * sizeof(__u32)); -#endif -} - - -int cfs_curproc_is_in_groups(gid_t gid) -{ - return in_group_p(gid); -} - -mode_t cfs_curproc_umask(void) -{ - return current->fs->umask; -} - -char *cfs_curproc_comm(void) -{ - return current->comm; -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return current->cap_effective; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - current->cap_effective = cap; -} - -EXPORT_SYMBOL(cfs_curproc_uid); -EXPORT_SYMBOL(cfs_curproc_pid); -EXPORT_SYMBOL(cfs_curproc_gid); -EXPORT_SYMBOL(cfs_curproc_fsuid); -EXPORT_SYMBOL(cfs_curproc_fsgid); -EXPORT_SYMBOL(cfs_curproc_umask); -EXPORT_SYMBOL(cfs_curproc_comm); -EXPORT_SYMBOL(cfs_curproc_groups_nr); -EXPORT_SYMBOL(cfs_curproc_groups_dump); -EXPORT_SYMBOL(cfs_curproc_is_in_groups); -EXPORT_SYMBOL(cfs_curproc_cap_get); -EXPORT_SYMBOL(cfs_curproc_cap_set); - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/linux/linux-debug.c b/lnet/libcfs/linux/linux-debug.c deleted file mode 100644 index 4a48ae63595ff6048419cad3a51bb57b934fbe36..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-debug.c +++ /dev/null @@ -1,237 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/notifier.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/completion.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/miscdevice.h> -#include <linux/version.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/linux/portals_compat25.h> -#include <libcfs/libcfs.h> - -#include "tracefile.h" - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/kallsyms.h> -#endif - -char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; - -void libcfs_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = lnet_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0 && rc != -ENOENT) { - CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/lnet/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - libcfs_run_upcall (argv); -} - -#ifdef __arch_um__ -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "LBUG - trying to dump log to /tmp/lustre-log\n"); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - asm("int $3"); - panic("LBUG"); -} -#else -/* coverity[+kill] */ -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); - - if (in_interrupt()) { - panic("LBUG in interrupt.\n"); - /* not reached */ - } - - libcfs_debug_dumpstack(NULL); - libcfs_debug_dumplog(); - libcfs_run_lbug_upcall(file, func, line); - set_task_state(current, TASK_UNINTERRUPTIBLE); - while (1) - schedule(); -} -#endif /* __arch_um__ */ - -#ifdef __KERNEL__ - -void libcfs_debug_dumpstack(struct task_struct *tsk) -{ -#if defined(__arch_um__) - if (tsk != NULL) - CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", - tsk->pid, UML_PID(tsk)); - //asm("int $3"); -#elif defined(HAVE_SHOW_TASK) - /* this is exported by lustre kernel version 42 */ - extern void show_task(struct task_struct *); - - if (tsk == NULL) - tsk = current; - CWARN("showing stack for process %d\n", tsk->pid); - show_task(tsk); -#else - CWARN("can't show stack: kernel doesn't export show_task\n"); - if ((tsk == NULL) || (tsk == current)) - dump_stack(); -#endif -} - -cfs_task_t *libcfs_current(void) -{ - CWARN("current task struct is %p\n", current); - return current; -} - -static int panic_notifier(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (libcfs_panic_in_progress) - return 0; - - libcfs_panic_in_progress = 1; - mb(); - -#ifdef LNET_DUMP_ON_PANIC - /* This is currently disabled because it spews far too much to the - * console on the rare cases it is ever triggered. */ - - if (in_interrupt()) { - trace_debug_print(); - } else { - while (current->lock_depth >= 0) - unlock_kernel(); - - libcfs_debug_dumplog_internal((void *)(long)cfs_curproc_pid()); - } -#endif - return 0; -} - -static struct notifier_block libcfs_panic_notifier = { - notifier_call : panic_notifier, - next : NULL, - priority : 10000 -}; - -void libcfs_register_panic_notifier(void) -{ -#ifdef HAVE_ATOMIC_PANIC_NOTIFIER - atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); -#else - notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); -#endif -} - -void libcfs_unregister_panic_notifier(void) -{ -#ifdef HAVE_ATOMIC_PANIC_NOTIFIER - atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); -#else - notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); -#endif -} - -EXPORT_SYMBOL(libcfs_debug_dumpstack); -EXPORT_SYMBOL(libcfs_current); - -#endif /* __KERNEL__ */ - -EXPORT_SYMBOL(libcfs_run_upcall); -EXPORT_SYMBOL(libcfs_run_lbug_upcall); -EXPORT_SYMBOL(lbug_with_loc); diff --git a/lnet/libcfs/linux/linux-fs.c b/lnet/libcfs/linux/linux-fs.c deleted file mode 100644 index 061944c4bb62f9eec481a14af821844f5953ecf9..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-fs.c +++ /dev/null @@ -1,115 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <linux/fs.h> -#include <linux/kdev_t.h> -#include <linux/ctype.h> -#include <asm/uaccess.h> - -#include <libcfs/libcfs.h> - -cfs_file_t * -cfs_filp_open (const char *name, int flags, int mode, int *err) -{ - /* XXX - * Maybe we need to handle flags and mode in the future - */ - cfs_file_t *filp = NULL; - - filp = filp_open(name, flags, mode); - if (IS_ERR(filp)) { - int rc; - - rc = PTR_ERR(filp); - printk(KERN_ERR "LustreError: can't open %s file: err %d\n", - name, rc); - if (err) - *err = rc; - filp = NULL; - } - return filp; -} - -/* write a userspace buffer to disk. - * NOTE: this returns 0 on success, not the number of bytes written. */ -ssize_t -cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset) -{ - mm_segment_t fs; - ssize_t size = 0; - - fs = get_fs(); - set_fs(KERNEL_DS); - while (count > 0) { - size = filp->f_op->write(filp, (char *)buf, count, offset); - if (size < 0) - break; - count -= size; - size = 0; - } - set_fs(fs); - - return size; -} - -cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor) -{ - return MKDEV(major, minor); -} - -cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev) -{ - return MAJOR(rdev); -} - -cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev) -{ - return MINOR(rdev); -} - -#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL && \ - CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\ - CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\ - CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\ - CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\ - CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW) - -int cfs_oflags2univ(int flags) -{ - int f; - - f = flags & O_ACCMODE; - f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; - f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; - f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0; - f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; - f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; - f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; - f |= (flags & O_SYNC)? CFS_O_SYNC: 0; - f |= (flags & FASYNC)? CFS_O_ASYNC: 0; - f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0; - f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0; - f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0; - f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0; - f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0; - return f; -} -#else - -int cfs_oflags2univ(int flags) -{ - return (flags); -} -#endif - -/* - * XXX Liang: we don't need cfs_univ2oflags() now. - */ -int cfs_univ2oflags(int flags) -{ - return (flags); -} - -EXPORT_SYMBOL(cfs_filp_open); -EXPORT_SYMBOL(cfs_user_write); -EXPORT_SYMBOL(cfs_oflags2univ); -EXPORT_SYMBOL(cfs_univ2oflags); diff --git a/lnet/libcfs/linux/linux-lock.c b/lnet/libcfs/linux/linux-lock.c deleted file mode 100644 index 01511d6337a1ae5229c408e54daf2f51761b8b4b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-lock.c +++ /dev/null @@ -1,4 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - -#include <arch-linux/cfs_lock.h> -#include <libcfs/libcfs.h> diff --git a/lnet/libcfs/linux/linux-lwt.c b/lnet/libcfs/linux/linux-lwt.c deleted file mode 100644 index 520c54ce68573bf1c5df463fdc2583df116429a1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-lwt.c +++ /dev/null @@ -1,2 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/linux/linux-mem.c b/lnet/libcfs/linux/linux-mem.c deleted file mode 100644 index f327814f69fbe19c927d1d1f3621a74191b19a0c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-mem.c +++ /dev/null @@ -1,133 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <linux/mm.h> -#include <linux/vmalloc.h> -#include <linux/slab.h> -#include <linux/highmem.h> -#include <libcfs/libcfs.h> - -static unsigned int cfs_alloc_flags_to_gfp(u_int32_t flags) -{ - unsigned int mflags = 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - if (flags & CFS_ALLOC_ATOMIC) - mflags |= __GFP_HIGH; - else if (flags & CFS_ALLOC_WAIT) - mflags |= __GFP_WAIT; - else - mflags |= (__GFP_HIGH | __GFP_WAIT); - if (flags & CFS_ALLOC_IO) - mflags |= __GFP_IO | __GFP_HIGHIO; -#else - if (flags & CFS_ALLOC_ATOMIC) - mflags |= __GFP_HIGH; - else - mflags |= __GFP_WAIT; - if (flags & CFS_ALLOC_NOWARN) - mflags |= __GFP_NOWARN; - if (flags & CFS_ALLOC_IO) - mflags |= __GFP_IO; -#endif - if (flags & CFS_ALLOC_FS) - mflags |= __GFP_FS; - return mflags; -} - -void * -cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *ptr = NULL; - - ptr = kmalloc(nr_bytes, cfs_alloc_flags_to_gfp(flags)); - if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) - memset(ptr, 0, nr_bytes); - return ptr; -} - -void -cfs_free(void *addr) -{ - kfree(addr); -} - -void * -cfs_alloc_large(size_t nr_bytes) -{ - return vmalloc(nr_bytes); -} - -void -cfs_free_large(void *addr) -{ - vfree(addr); -} - -cfs_page_t *cfs_alloc_page(unsigned int flags) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return alloc_pages(cfs_alloc_flags_to_gfp(flags), 0); -} - -cfs_mem_cache_t * -cfs_mem_cache_create (const char *name, size_t size, size_t offset, - unsigned long flags) -{ - return kmem_cache_create(name, size, offset, flags, NULL, NULL); -} - -int -cfs_mem_cache_destroy (cfs_mem_cache_t * cachep) -{ -#ifdef HAVE_KMEM_CACHE_DESTROY_INT - return kmem_cache_destroy(cachep); -#else - kmem_cache_destroy(cachep); - return 0; -#endif -} - -void * -cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags) -{ - return kmem_cache_alloc(cachep, cfs_alloc_flags_to_gfp(flags)); -} - -void -cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp) -{ - return kmem_cache_free(cachep, objp); -} - -EXPORT_SYMBOL(cfs_alloc); -EXPORT_SYMBOL(cfs_free); -EXPORT_SYMBOL(cfs_alloc_large); -EXPORT_SYMBOL(cfs_free_large); -EXPORT_SYMBOL(cfs_alloc_page); -EXPORT_SYMBOL(cfs_mem_cache_create); -EXPORT_SYMBOL(cfs_mem_cache_destroy); -EXPORT_SYMBOL(cfs_mem_cache_alloc); -EXPORT_SYMBOL(cfs_mem_cache_free); diff --git a/lnet/libcfs/linux/linux-module.c b/lnet/libcfs/linux/linux-module.c deleted file mode 100644 index 6f21853bfc387be7438b722a64433785fa6fe93f..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-module.c +++ /dev/null @@ -1,151 +0,0 @@ -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define LNET_MINOR 240 - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if (err) - RETURN(err); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR("PORTALS: version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR("PORTALS: user buffer exceeds kernel buffer\n"); - RETURN(-EINVAL); - } - - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR("PORTALS: user buffer too small for ioctl\n"); - RETURN(-EINVAL); - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if (err) - RETURN(err); - - if (libcfs_ioctl_is_invalid(data)) { - CERROR("PORTALS: ioctl not correctly formatted\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - - RETURN(0); -} - -int libcfs_ioctl_popdata(void *arg, void *data, int size) -{ - if (copy_to_user((char *)arg, data, size)) - return -EFAULT; - return 0; -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; - -static int -libcfs_psdev_open(struct inode * inode, struct file * file) -{ - struct libcfs_device_userstate **pdu = NULL; - int rc = 0; - - if (!inode) - return (-EINVAL); - pdu = (struct libcfs_device_userstate **)&file->private_data; - if (libcfs_psdev_ops.p_open != NULL) - rc = libcfs_psdev_ops.p_open(0, (void *)pdu); - else - return (-EPERM); - return rc; -} - -/* called when closing /dev/device */ -static int -libcfs_psdev_release(struct inode * inode, struct file * file) -{ - struct libcfs_device_userstate *pdu; - int rc = 0; - - if (!inode) - return (-EINVAL); - pdu = file->private_data; - if (libcfs_psdev_ops.p_close != NULL) - rc = libcfs_psdev_ops.p_close(0, (void *)pdu); - else - rc = -EPERM; - return rc; -} - -static int -libcfs_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct cfs_psdev_file pfile; - int rc = 0; - - if (current->fsuid != 0) - return -EACCES; - - if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - return (-EINVAL); - } - - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable (CAP_SYS_BOOT)) - return (-EPERM); - panic("debugctl-invoked panic"); - return (0); - case IOC_LIBCFS_MEMHOG: - if (!capable (CAP_SYS_ADMIN)) - return -EPERM; - /* go thought */ - } - - pfile.off = 0; - pfile.private_data = file->private_data; - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = -EPERM; - return (rc); -} - -static struct file_operations libcfs_fops = { - ioctl: libcfs_ioctl, - open: libcfs_psdev_open, - release: libcfs_psdev_release -}; - -cfs_psdev_t libcfs_dev = { - LNET_MINOR, - "lnet", - &libcfs_fops -}; - - diff --git a/lnet/libcfs/linux/linux-prim.c b/lnet/libcfs/linux/linux-prim.c deleted file mode 100644 index fe5d61f710fe7c85f22bbc8ca5790982b60fd45c..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-prim.c +++ /dev/null @@ -1,154 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <libcfs/libcfs.h> - -#if defined(CONFIG_KGDB) -#include <asm/kgdb.h> -#endif - -void cfs_enter_debugger(void) -{ -#if defined(CONFIG_KGDB) - BREAKPOINT(); -#elif defined(__arch_um__) - asm("int $3"); -#else - /* nothing */ -#endif -} - -void cfs_daemonize(char *str) { - unsigned long flags; - - lock_kernel(); -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) - daemonize(str); -#else - daemonize(); - exit_files(current); - reparent_to_init(); - snprintf (current->comm, sizeof (current->comm), "%s", str); -#endif - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - unlock_kernel(); -} - -int cfs_daemonize_ctxt(char *str) { - struct task_struct *tsk = current; - struct fs_struct *fs = NULL; - - cfs_daemonize(str); - fs = copy_fs_struct(tsk->fs); - if (fs == NULL) - return -ENOMEM; - exit_fs(tsk); - tsk->fs = fs; - return 0; -} - - -sigset_t -cfs_get_blockedsigs(void) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - SIGNAL_MASK_UNLOCK(current, flags); - return old; -} - -sigset_t -cfs_block_allsigs(void) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - return old; -} - -sigset_t -cfs_block_sigs(sigset_t bits) -{ - unsigned long flags; - sigset_t old; - - SIGNAL_MASK_LOCK(current, flags); - old = current->blocked; - current->blocked = bits; - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - return old; -} - -void -cfs_restore_sigs (cfs_sigset_t old) -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - current->blocked = old; - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -int -cfs_signal_pending(void) -{ - return signal_pending(current); -} - -void -cfs_clear_sigpending(void) -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - CLEAR_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -int -libcfs_arch_init(void) -{ - return 0; -} - -void -libcfs_arch_cleanup(void) -{ - return; -} - -EXPORT_SYMBOL(libcfs_arch_init); -EXPORT_SYMBOL(libcfs_arch_cleanup); -EXPORT_SYMBOL(cfs_daemonize); -EXPORT_SYMBOL(cfs_daemonize_ctxt); -EXPORT_SYMBOL(cfs_block_allsigs); -EXPORT_SYMBOL(cfs_block_sigs); -EXPORT_SYMBOL(cfs_get_blockedsigs); -EXPORT_SYMBOL(cfs_restore_sigs); -EXPORT_SYMBOL(cfs_signal_pending); -EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/lnet/libcfs/linux/linux-proc.c b/lnet/libcfs/linux/linux-proc.c deleted file mode 100644 index 3efdd467391fbb1d4ea1ff944176f12f8bd3af57..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-proc.c +++ /dev/null @@ -1,217 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#include <linux/proc_fs.h> -#include <linux/sysctl.h> - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <asm/div64.h> -#include "tracefile.h" - -static struct ctl_table_header *lnet_table_header = NULL; -extern char lnet_upcall[1024]; - -#define PSDEV_LNET (0x100) -enum { - PSDEV_DEBUG = 1, /* control debugging */ - PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ - PSDEV_PRINTK, /* force all messages to console */ - PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */ - PSDEV_DEBUG_PATH, /* crashdump log location */ - PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ - PSDEV_LNET_UPCALL, /* User mode upcall script */ - PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */ - PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */ -}; - -int LL_PROC_PROTO(proc_dobitmasks); - -static struct ctl_table lnet_table[] = { - {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL, - &proc_dobitmasks}, - {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug, - sizeof(int), 0644, NULL, &proc_dobitmasks}, - {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL, - &proc_dobitmasks}, - {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit",&libcfs_console_ratelimit, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, - {PSDEV_LNET_UPCALL, "upcall", lnet_upcall, - sizeof(lnet_upcall), 0644, NULL, &proc_dostring, - &sysctl_string}, - {PSDEV_LNET_MEMUSED, "memused", (int *)&libcfs_kmemory.counter, - sizeof(int), 0444, NULL, &proc_dointvec}, - {PSDEV_LNET_CATASTROPHE, "catastrophe", &libcfs_catastrophe, - sizeof(int), 0444, NULL, &proc_dointvec}, - {0} -}; - -static struct ctl_table top_table[2] = { - {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table}, - {0} -}; - -int LL_PROC_PROTO(proc_dobitmasks) -{ - const int tmpstrlen = 512; - char *str; - int rc = 0; - /* the proc filling api stumps me always, coax proc_dointvec - * and proc_dostring into doing the drudgery by cheating - * with a dummy ctl_table - */ - struct ctl_table dummy = *table; - unsigned int *mask = (unsigned int *)table->data; - int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; - - str = kmalloc(tmpstrlen, GFP_USER); - if (str == NULL) - return -ENOMEM; - - if (write) { - size_t oldlen = *lenp; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8) - loff_t oldpos = *ppos; -#endif - - dummy.proc_handler = &proc_dointvec; - - /* old proc interface allows user to specify just an int - * value; be compatible and don't break userland. - */ - rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); - - if (rc != -EINVAL) - goto out; - - /* using new interface */ - dummy.data = str; - dummy.maxlen = tmpstrlen; - dummy.proc_handler = &proc_dostring; - - /* proc_dointvec might have changed these */ - *lenp = oldlen; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8) - *ppos = oldpos; -#endif - - rc = ll_proc_dostring(&dummy, write, filp, buffer, lenp, ppos); - - if (rc != 0) - goto out; - - rc = libcfs_debug_str2mask(mask, dummy.data, is_subsys); - } else { - dummy.data = str; - dummy.maxlen = tmpstrlen; - dummy.proc_handler = &proc_dostring; - - libcfs_debug_mask2str(dummy.data, dummy.maxlen,*mask,is_subsys); - - rc = ll_proc_dostring(&dummy, write, filp, buffer, lenp, ppos); - } - -out: - kfree(str); - return rc; -} - -int insert_proc(void) -{ - struct proc_dir_entry *ent; - -#ifdef CONFIG_SYSCTL - if (!lnet_table_header) - lnet_table_header = register_sysctl_table(top_table, 0); -#endif - - ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL); - if (ent == NULL) { - CERROR("couldn't register dump_kernel\n"); - return -1; - } - ent->write_proc = trace_dk; - - ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL); - if (ent == NULL) { - CERROR("couldn't register daemon_file\n"); - return -1; - } - ent->write_proc = trace_write_daemon_file; - ent->read_proc = trace_read_daemon_file; - - ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL); - if (ent == NULL) { - CERROR("couldn't register debug_mb\n"); - return -1; - } - ent->write_proc = trace_write_debug_mb; - ent->read_proc = trace_read_debug_mb; - - return 0; -} - -void remove_proc(void) -{ - remove_proc_entry("sys/lnet/dump_kernel", NULL); - remove_proc_entry("sys/lnet/daemon_file", NULL); - remove_proc_entry("sys/lnet/debug_mb", NULL); - -#ifdef CONFIG_SYSCTL - if (lnet_table_header) - unregister_sysctl_table(lnet_table_header); - lnet_table_header = NULL; -#endif -} diff --git a/lnet/libcfs/linux/linux-sync.c b/lnet/libcfs/linux/linux-sync.c deleted file mode 100644 index 520c54ce68573bf1c5df463fdc2583df116429a1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-sync.c +++ /dev/null @@ -1,2 +0,0 @@ -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/linux/linux-tcpip.c b/lnet/libcfs/linux/linux-tcpip.c deleted file mode 100644 index 9cb85efa13a2bef5124408746310e8dde664db1b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-tcpip.c +++ /dev/null @@ -1,687 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -#include <linux/if.h> -#include <linux/in.h> -#include <linux/file.h> -/* For sys_open & sys_close */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -#include <linux/syscalls.h> -#else -#include <linux/fs.h> -#endif - -int -libcfs_sock_ioctl(int cmd, unsigned long arg) -{ - mm_segment_t oldmm = get_fs(); - struct socket *sock; - int fd; - int rc; - struct file *sock_filp; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return rc; - } - - fd = sock_map_fd(sock); - if (fd < 0) { - rc = fd; - sock_release(sock); - goto out; - } - - sock_filp = fget(fd); - if (!sock_filp) { - rc = -ENOMEM; - goto out_fd; - } - - set_fs(KERNEL_DS); -#ifdef HAVE_UNLOCKED_IOCTL - if (sock_filp->f_op->unlocked_ioctl) - rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg); - else -#endif - { - lock_kernel(); - rc =sock_filp->f_op->ioctl(sock_filp->f_dentry->d_inode, - sock_filp, cmd, arg); - unlock_kernel(); - } - set_fs(oldmm); - - fput(sock_filp); - - out_fd: - sys_close(fd); - out: - return rc; -} - -int -libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) -{ - struct ifreq ifr; - int nob; - int rc; - __u32 val; - - nob = strnlen(name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - CERROR("Interface name %s too long\n", name); - rc = -EINVAL; - goto out; - } - - CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); - - strcpy(ifr.ifr_name, name); - rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get flags for interface %s\n", name); - goto out; - } - - if ((ifr.ifr_flags & IFF_UP) == 0) { - CDEBUG(D_NET, "Interface %s down\n", name); - *up = 0; - *ip = *mask = 0; - goto out; - } - - *up = 1; - - strcpy(ifr.ifr_name, name); - ifr.ifr_addr.sa_family = AF_INET; - rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get IP address for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; - *ip = ntohl(val); - - strcpy(ifr.ifr_name, name); - ifr.ifr_addr.sa_family = AF_INET; - rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); - - if (rc != 0) { - CERROR("Can't get netmask for interface %s\n", name); - goto out; - } - - val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; - *mask = ntohl(val); - - out: - return rc; -} - -EXPORT_SYMBOL(libcfs_ipif_query); - -int -libcfs_ipif_enumerate (char ***namesp) -{ - /* Allocate and fill in 'names', returning # interfaces/error */ - char **names; - int toobig; - int nalloc; - int nfound; - struct ifreq *ifr; - struct ifconf ifc; - int rc; - int nob; - int i; - - - nalloc = 16; /* first guess at max interfaces */ - toobig = 0; - for (;;) { - if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { - toobig = 1; - nalloc = CFS_PAGE_SIZE/sizeof(*ifr); - CWARN("Too many interfaces: only enumerating first %d\n", - nalloc); - } - - LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); - if (ifr == NULL) { - CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); - rc = -ENOMEM; - goto out0; - } - - ifc.ifc_buf = (char *)ifr; - ifc.ifc_len = nalloc * sizeof(*ifr); - - rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); - - if (rc < 0) { - CERROR ("Error %d enumerating interfaces\n", rc); - goto out1; - } - - LASSERT (rc == 0); - - nfound = ifc.ifc_len/sizeof(*ifr); - LASSERT (nfound <= nalloc); - - if (nfound < nalloc || toobig) - break; - - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - nalloc *= 2; - } - - if (nfound == 0) - goto out1; - - LIBCFS_ALLOC(names, nfound * sizeof(*names)); - if (names == NULL) { - rc = -ENOMEM; - goto out1; - } - /* NULL out all names[i] */ - memset (names, 0, nfound * sizeof(*names)); - - for (i = 0; i < nfound; i++) { - - nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); - if (nob == IFNAMSIZ) { - /* no space for terminating NULL */ - CERROR("interface name %.*s too long (%d max)\n", - nob, ifr[i].ifr_name, IFNAMSIZ); - rc = -ENAMETOOLONG; - goto out2; - } - - LIBCFS_ALLOC(names[i], IFNAMSIZ); - if (names[i] == NULL) { - rc = -ENOMEM; - goto out2; - } - - memcpy(names[i], ifr[i].ifr_name, nob); - names[i][nob] = 0; - } - - *namesp = names; - rc = nfound; - - out2: - if (rc < 0) - libcfs_ipif_free_enumeration(names, nfound); - out1: - LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); - out0: - return rc; -} - -EXPORT_SYMBOL(libcfs_ipif_enumerate); - -void -libcfs_ipif_free_enumeration (char **names, int n) -{ - int i; - - LASSERT (n > 0); - - for (i = 0; i < n && names[i] != NULL; i++) - LIBCFS_FREE(names[i], IFNAMSIZ); - - LIBCFS_FREE(names, n * sizeof(*names)); -} - -EXPORT_SYMBOL(libcfs_ipif_free_enumeration); - -int -libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - mm_segment_t oldmm = get_fs(); - long ticks = timeout * HZ; - unsigned long then; - struct timeval tv; - - LASSERT (nob > 0); - /* Caller may pass a zero timeout if she thinks the socket buffer is - * empty enough to take the whole message immediately */ - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 - }; - - if (timeout != 0) { - /* Set send timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; - set_fs(KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, - (char *)&tv, sizeof(tv)); - set_fs(oldmm); - if (rc != 0) { - CERROR("Can't set socket send timeout " - "%ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - } - - set_fs (KERNEL_DS); - then = jiffies; - rc = sock_sendmsg (sock, &msg, iov.iov_len); - ticks -= jiffies - then; - set_fs (oldmm); - - if (rc == nob) - return 0; - - if (rc < 0) - return rc; - - if (rc == 0) { - CERROR ("Unexpected zero rc\n"); - return (-ECONNABORTED); - } - - if (ticks <= 0) - return -EAGAIN; - - buffer = ((char *)buffer) + rc; - nob -= rc; - } - - return (0); -} -EXPORT_SYMBOL(libcfs_sock_write); - -int -libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - mm_segment_t oldmm = get_fs(); - long ticks = timeout * HZ; - unsigned long then; - struct timeval tv; - - LASSERT (nob > 0); - LASSERT (ticks > 0); - - for (;;) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - - /* Set receive timeout to remaining time */ - tv = (struct timeval) { - .tv_sec = ticks / HZ, - .tv_usec = ((ticks % HZ) * 1000000) / HZ - }; - set_fs(KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&tv, sizeof(tv)); - set_fs(oldmm); - if (rc != 0) { - CERROR("Can't set socket recv timeout %ld.%06d: %d\n", - (long)tv.tv_sec, (int)tv.tv_usec, rc); - return rc; - } - - set_fs(KERNEL_DS); - then = jiffies; - rc = sock_recvmsg(sock, &msg, iov.iov_len, 0); - ticks -= jiffies - then; - set_fs(oldmm); - - if (rc < 0) - return rc; - - if (rc == 0) - return -ECONNRESET; - - buffer = ((char *)buffer) + rc; - nob -= rc; - - if (nob == 0) - return 0; - - if (ticks <= 0) - return -ETIMEDOUT; - } -} - -EXPORT_SYMBOL(libcfs_sock_read); - -static int -libcfs_sock_create (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port) -{ - struct sockaddr_in locaddr; - struct socket *sock; - int rc; - int option; - mm_segment_t oldmm = get_fs(); - - /* All errors are fatal except bind failure if the port is in use */ - *fatal = 1; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc != 0) { - CERROR ("Can't create socket: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - option = 1; - rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - if (local_ip != 0 || local_port != 0) { - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = (local_ip == 0) ? - INADDR_ANY : htonl(local_ip); - - rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr, - sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *fatal = 0; - goto failed; - } - if (rc != 0) { - CERROR("Error trying to bind to port %d: %d\n", - local_port, rc); - goto failed; - } - } - - return 0; - - failed: - sock_release(sock); - return rc; -} - -int -libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) -{ - mm_segment_t oldmm = get_fs(); - int option; - int rc; - - if (txbufsize != 0) { - option = txbufsize; - set_fs (KERNEL_DS); - rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } - } - - if (rxbufsize != 0) { - option = rxbufsize; - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_setbuf); - -int -libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) -{ - struct sockaddr_in sin; - int len = sizeof (sin); - int rc; - - rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len, - remote ? 2 : 0); - if (rc != 0) { - CERROR ("Error %d getting sock %s IP/port\n", - rc, remote ? "peer" : "local"); - return rc; - } - - if (ip != NULL) - *ip = ntohl (sin.sin_addr.s_addr); - - if (port != NULL) - *port = ntohs (sin.sin_port); - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_getaddr); - -int -libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) -{ - - if (txbufsize != NULL) { - *txbufsize = sock->sk->sk_sndbuf; - } - - if (rxbufsize != NULL) { - *rxbufsize = sock->sk->sk_rcvbuf; - } - - return 0; -} - -EXPORT_SYMBOL(libcfs_sock_getbuf); - -int -libcfs_sock_listen (struct socket **sockp, - __u32 local_ip, int local_port, int backlog) -{ - int fatal; - int rc; - - rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); - if (rc != 0) { - if (!fatal) - CERROR("Can't create socket: port %d already in use\n", - local_port); - return rc; - } - - rc = (*sockp)->ops->listen(*sockp, backlog); - if (rc == 0) - return 0; - - CERROR("Can't set listen backlog %d: %d\n", backlog, rc); - sock_release(*sockp); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_listen); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) -int sock_create_lite(int family, int type, int protocol, struct socket **res) -{ - int err = 0; - struct socket *sock; - - sock = sock_alloc(); - if (!sock) { - err = -ENOMEM; - goto out; - } - sock->type = type; -out: - *res = sock; - return err; -} -#endif - -int -libcfs_sock_accept (struct socket **newsockp, struct socket *sock) -{ - wait_queue_t wait; - struct socket *newsock; - int rc; - - init_waitqueue_entry(&wait, current); - - /* XXX this should add a ref to sock->ops->owner, if - * TCP could be a module */ - rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); - if (rc) { - CERROR("Can't allocate socket\n"); - return rc; - } - - newsock->ops = sock->ops; - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(sock->sk->sk_sleep, &wait); - - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); - if (rc == -EAGAIN) { - /* Nothing ready, so wait for activity */ - schedule(); - rc = sock->ops->accept(sock, newsock, O_NONBLOCK); - } - - remove_wait_queue(sock->sk->sk_sleep, &wait); - set_current_state(TASK_RUNNING); - - if (rc != 0) - goto failed; - - *newsockp = newsock; - return 0; - - failed: - sock_release(newsock); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_accept); - -void -libcfs_sock_abort_accept (struct socket *sock) -{ - wake_up_all(sock->sk->sk_sleep); -} - -EXPORT_SYMBOL(libcfs_sock_abort_accept); - -int -libcfs_sock_connect (struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - struct sockaddr_in srvaddr; - int rc; - - rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); - if (rc != 0) - return rc; - - memset (&srvaddr, 0, sizeof (srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(peer_port); - srvaddr.sin_addr.s_addr = htonl(peer_ip); - - rc = (*sockp)->ops->connect(*sockp, - (struct sockaddr *)&srvaddr, sizeof(srvaddr), - 0); - if (rc == 0) - return 0; - - /* EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... */ - *fatal = !(rc == -EADDRNOTAVAIL); - - CDEBUG(*fatal ? D_NETERROR : D_NET, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); - - sock_release(*sockp); - return rc; -} - -EXPORT_SYMBOL(libcfs_sock_connect); - -void -libcfs_sock_release (struct socket *sock) -{ - sock_release(sock); -} - -EXPORT_SYMBOL(libcfs_sock_release); diff --git a/lnet/libcfs/linux/linux-tracefile.c b/lnet/libcfs/linux/linux-tracefile.c deleted file mode 100644 index 1fb38cfbe09fc7c2ab95f576fdf92c06c989160f..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-tracefile.c +++ /dev/null @@ -1,327 +0,0 @@ -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifndef get_cpu -#define get_cpu() smp_processor_id() -#define put_cpu() do { } while (0) -#endif - -extern union trace_data_union trace_data[NR_CPUS]; -extern char *tracefile; -extern long long tracefile_size; - -char *trace_console_buffers[NR_CPUS][3]; - -struct rw_semaphore tracefile_sem; - -int tracefile_init_arch() -{ - int i; - int j; - - init_rwsem(&tracefile_sem); - - for (i = 0; i < NR_CPUS; i++) - for (j = 0; j < 3; j++) { - trace_console_buffers[i][j] = - kmalloc(TRACE_CONSOLE_BUFFER_SIZE, - GFP_KERNEL); - - if (trace_console_buffers[i][j] == NULL) { - tracefile_fini_arch(); - printk(KERN_ERR - "Can't allocate " - "console message buffer\n"); - return -ENOMEM; - } - } - - return 0; -} - -void tracefile_fini_arch() -{ - int i; - int j; - - for (i = 0; i < NR_CPUS; i++) - for (j = 0; j < 3; j++) - if (trace_console_buffers[i][j] != NULL) { - kfree(trace_console_buffers[i][j]); - trace_console_buffers[i][j] = NULL; - } -} - -void tracefile_read_lock() -{ - down_read(&tracefile_sem); -} - -void tracefile_read_unlock() -{ - up_read(&tracefile_sem); -} - -void tracefile_write_lock() -{ - down_write(&tracefile_sem); -} - -void tracefile_write_unlock() -{ - up_write(&tracefile_sem); -} - -char * -trace_get_console_buffer(void) -{ - int cpu = get_cpu(); - int idx; - - if (in_irq()) { - idx = 0; - } else if (in_softirq()) { - idx = 1; - } else { - idx = 2; - } - - return trace_console_buffers[cpu][idx]; -} - -void -trace_put_console_buffer(char *buffer) -{ - put_cpu(); -} - -struct trace_cpu_data * -trace_get_tcd(void) -{ - int cpu; - - if (in_interrupt()) /* no logging in IRQ context */ - return NULL; - - cpu = get_cpu(); - return &trace_data[cpu].tcd; -} - -void -trace_put_tcd (struct trace_cpu_data *tcd) -{ - __LASSERT (!in_interrupt()); - put_cpu(); -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - return tcd->tcd_cpu == tage->cpu; -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - do_gettimeofday(&tv); - - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = line; -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - header->ph_extern_pid = current->thread.extern_pid; -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - header->ph_extern_pid = current->thread.mode.tt.extern_pid; -#else - header->ph_extern_pid = 0; -#endif - return; -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = "Lustre", *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %.*s", ptype, prefix, len, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); - } - return; -} - -int trace_write_daemon_file(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char *name; - unsigned long off; - int rc; - - name = kmalloc(count + 1, GFP_KERNEL); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, buffer, count)) { - rc = -EFAULT; - goto out; - } - - /* be nice and strip out trailing '\n' */ - for (off = count ; off > 2 && isspace(name[off - 1]); off--) - ; - - name[off] = '\0'; - - tracefile_write_lock(); - if (strcmp(name, "stop") == 0) { - tracefile = NULL; - trace_stop_thread(); - goto out_sem; - } else if (strncmp(name, "size=", 5) == 0) { - tracefile_size = simple_strtoul(name + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; - goto out_sem; - } - - if (name[0] != '/') { - rc = -EINVAL; - goto out_sem; - } - - if (tracefile != NULL) - kfree(tracefile); - - tracefile = name; - name = NULL; - printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " - "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); - - trace_start_thread(); -out_sem: - tracefile_write_unlock(); -out: - kfree(name); - return count; -} - -int trace_read_daemon_file(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - int rc; - - tracefile_read_lock(); - rc = snprintf(page, count, "%s", tracefile); - tracefile_read_unlock(); - - return rc; -} - -int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char string[32]; - int i; - unsigned max; - - if (count >= sizeof(string)) { - printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", - count); - return -EOVERFLOW; - } - - if (copy_from_user(string, buffer, count)) - return -EFAULT; - - max = simple_strtoul(string, NULL, 0); - if (max == 0) - return -EINVAL; - - if (max > (num_physpages >> (20 - 2 - CFS_PAGE_SHIFT)) / 5 || max >= 512) { - printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%dMB, which is more than 80%% of available RAM (%lu)\n", - max, (num_physpages >> (20 - 2 - CFS_PAGE_SHIFT)) / 5); - return -EINVAL; - } - - max /= smp_num_cpus; - - for (i = 0; i < NR_CPUS; i++) { - struct trace_cpu_data *tcd; - tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max << (20 - CFS_PAGE_SHIFT); - } - return count; -} - -int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct trace_cpu_data *tcd; - int rc; - - tcd = trace_get_tcd(); - __LASSERT (tcd != NULL); - - rc = snprintf(page, count, "%lu\n", - (tcd->tcd_max_pages >> (20 - CFS_PAGE_SHIFT)) * smp_num_cpus); - - trace_put_tcd(tcd); - return rc; -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ - cpumask_t cpus_allowed = current->cpus_allowed; - /* use cpus_allowed to quiet 2.4 UP kernel warning only */ - cpumask_t m = cpus_allowed; - int cpu; - - /* Run the given routine on every CPU in thread context */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) - continue; - - cpus_clear(m); - cpu_set(cpu, m); - set_cpus_allowed(current, m); - - fn(arg); - - set_cpus_allowed(current, cpus_allowed); - } -} diff --git a/lnet/libcfs/linux/linux-utils.c b/lnet/libcfs/linux/linux-utils.c deleted file mode 100644 index 60f7cb879aabd86fc8a0b1fcf32ae85bc777e36b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/linux/linux-utils.c +++ /dev/null @@ -1,60 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -/* - * miscellaneous libcfs stuff - */ -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/types.h> - -/* - * Convert server error code to client format. Error codes are from - * Linux errno.h, so for Linux client---identity. - */ -int convert_server_error(__u64 ecode) -{ - return ecode; -} -EXPORT_SYMBOL(convert_server_error); - -/* - * convert <fcntl.h> flag from client to server. - */ -int convert_client_oflag(int cflag, int *result) -{ - *result = cflag; - return 0; -} -EXPORT_SYMBOL(convert_client_oflag); - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{} - -EXPORT_SYMBOL(cfs_stack_trace_fill); - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} -EXPORT_SYMBOL(cfs_stack_trace_frame); - diff --git a/lnet/libcfs/lwt.c b/lnet/libcfs/lwt.c deleted file mode 100644 index 3ed5d453f42ec96b0d3238f36a5f3d0de2d287fb..0000000000000000000000000000000000000000 --- a/lnet/libcfs/lwt.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#ifdef HAVE_KERNEL_CONFIG_H -#include <linux/config.h> -#endif -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/kernel.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/uaccess.h> - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> - -#if LWT_SUPPORT - -#if !KLWT_SUPPORT -int lwt_enabled; -lwt_cpu_t lwt_cpus[NR_CPUS]; -#endif - -int lwt_pages_per_cpu; - -/* NB only root is allowed to retrieve LWT info; it's an open door into the - * kernel... */ - -int -lwt_lookup_string (int *size, char *knl_ptr, - char *user_ptr, int user_size) -{ - int maxsize = 128; - - /* knl_ptr was retrieved from an LWT snapshot and the caller wants to - * turn it into a string. NB we can crash with an access violation - * trying to determine the string length, so we're trusting our - * caller... */ - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - if (user_size > 0 && - maxsize > user_size) - maxsize = user_size; - - *size = strnlen (knl_ptr, maxsize - 1) + 1; - - if (user_ptr != NULL) { - if (user_size < 4) - return (-EINVAL); - - if (copy_to_user (user_ptr, knl_ptr, *size)) - return (-EFAULT); - - /* Did I truncate the string? */ - if (knl_ptr[*size - 1] != 0) - copy_to_user (user_ptr + *size - 4, "...", 4); - } - - return (0); -} - -int -lwt_control (int enable, int clear) -{ - lwt_page_t *p; - int i; - int j; - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - if (!enable) { - LWT_EVENT(0,0,0,0); - lwt_enabled = 0; - mb(); - /* give people some time to stop adding traces */ - schedule_timeout(10); - } - - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; - - if (p == NULL) - return (-ENODATA); - - if (!clear) - continue; - - for (j = 0; j < lwt_pages_per_cpu; j++) { - memset (p->lwtp_events, 0, CFS_PAGE_SIZE); - - p = list_entry (p->lwtp_list.next, - lwt_page_t, lwtp_list); - } - } - - if (enable) { - lwt_enabled = 1; - mb(); - LWT_EVENT(0,0,0,0); - } - - return (0); -} - -int -lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size) -{ - const int events_per_page = CFS_PAGE_SIZE / sizeof(lwt_event_t); - const int bytes_per_page = events_per_page * sizeof(lwt_event_t); - lwt_page_t *p; - int i; - int j; - - if (!capable(CAP_SYS_ADMIN)) - return (-EPERM); - - *ncpu = num_online_cpus(); - *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; - *now = get_cycles(); - - if (user_ptr == NULL) - return (0); - - for (i = 0; i < num_online_cpus(); i++) { - p = lwt_cpus[i].lwtc_current_page; - - if (p == NULL) - return (-ENODATA); - - for (j = 0; j < lwt_pages_per_cpu; j++) { - if (copy_to_user(user_ptr, p->lwtp_events, - bytes_per_page)) - return (-EFAULT); - - user_ptr = ((char *)user_ptr) + bytes_per_page; - p = list_entry(p->lwtp_list.next, - lwt_page_t, lwtp_list); - - } - } - - return (0); -} - -int -lwt_init () -{ - int i; - int j; - - for (i = 0; i < num_online_cpus(); i++) - if (lwt_cpus[i].lwtc_current_page != NULL) - return (-EALREADY); - - LASSERT (!lwt_enabled); - - /* NULL pointers, zero scalars */ - memset (lwt_cpus, 0, sizeof (lwt_cpus)); - lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * CFS_PAGE_SIZE); - - for (i = 0; i < num_online_cpus(); i++) - for (j = 0; j < lwt_pages_per_cpu; j++) { - struct page *page = alloc_page (GFP_KERNEL); - lwt_page_t *lwtp; - - if (page == NULL) { - CERROR ("Can't allocate page\n"); - lwt_fini (); - return (-ENOMEM); - } - - LIBCFS_ALLOC(lwtp, sizeof (*lwtp)); - if (lwtp == NULL) { - CERROR ("Can't allocate lwtp\n"); - __free_page(page); - lwt_fini (); - return (-ENOMEM); - } - - lwtp->lwtp_page = page; - lwtp->lwtp_events = page_address(page); - memset (lwtp->lwtp_events, 0, CFS_PAGE_SIZE); - - if (j == 0) { - INIT_LIST_HEAD (&lwtp->lwtp_list); - lwt_cpus[i].lwtc_current_page = lwtp; - } else { - list_add (&lwtp->lwtp_list, - &lwt_cpus[i].lwtc_current_page->lwtp_list); - } - } - - lwt_enabled = 1; - mb(); - - LWT_EVENT(0,0,0,0); - - return (0); -} - -void -lwt_fini () -{ - int i; - - lwt_control(0, 0); - - for (i = 0; i < num_online_cpus(); i++) - while (lwt_cpus[i].lwtc_current_page != NULL) { - lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page; - - if (list_empty (&lwtp->lwtp_list)) { - lwt_cpus[i].lwtc_current_page = NULL; - } else { - lwt_cpus[i].lwtc_current_page = - list_entry (lwtp->lwtp_list.next, - lwt_page_t, lwtp_list); - - list_del (&lwtp->lwtp_list); - } - - __free_page (lwtp->lwtp_page); - LIBCFS_FREE (lwtp, sizeof (*lwtp)); - } -} - -EXPORT_SYMBOL(lwt_enabled); -EXPORT_SYMBOL(lwt_cpus); - -EXPORT_SYMBOL(lwt_init); -EXPORT_SYMBOL(lwt_fini); -EXPORT_SYMBOL(lwt_lookup_string); -EXPORT_SYMBOL(lwt_control); -EXPORT_SYMBOL(lwt_snapshot); -#endif diff --git a/lnet/libcfs/misc.c b/lnet/libcfs/misc.c deleted file mode 100644 index 0ace40d27add4895be15089525ae6ca664331375..0000000000000000000000000000000000000000 --- a/lnet/libcfs/misc.c +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - -/* - * On-wire format is native kdev_t format of Linux kernel 2.6 - */ -enum { - WIRE_RDEV_MINORBITS = 20, - WIRE_RDEV_MINORMASK = ((1U << WIRE_RDEV_MINORBITS) - 1) -}; - -cfs_wire_rdev_t cfs_wire_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor) -{ - return (major << WIRE_RDEV_MINORBITS) | minor; -} - -cfs_major_nr_t cfs_wire_rdev_major(cfs_wire_rdev_t rdev) -{ - return rdev >> WIRE_RDEV_MINORBITS; -} - -cfs_minor_nr_t cfs_wire_rdev_minor(cfs_wire_rdev_t rdev) -{ - return rdev & WIRE_RDEV_MINORMASK; -} - diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c deleted file mode 100644 index 5e273cbc0adfbe94fc953b769d075df1ee64e342..0000000000000000000000000000000000000000 --- a/lnet/libcfs/module.c +++ /dev/null @@ -1,423 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> -#include <lnet/lnet.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -void -kportal_memhog_free (struct libcfs_device_userstate *ldu) -{ - cfs_page_t **level0p = &ldu->ldu_memhog_root_page; - cfs_page_t **level1p; - cfs_page_t **level2p; - int count1; - int count2; - - if (*level0p != NULL) { - - level1p = (cfs_page_t **)cfs_page_address(*level0p); - count1 = 0; - - while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && - *level1p != NULL) { - - level2p = (cfs_page_t **)cfs_page_address(*level1p); - count2 = 0; - - while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && - *level2p != NULL) { - - cfs_free_page(*level2p); - ldu->ldu_memhog_pages--; - level2p++; - count2++; - } - - cfs_free_page(*level1p); - ldu->ldu_memhog_pages--; - level1p++; - count1++; - } - - cfs_free_page(*level0p); - ldu->ldu_memhog_pages--; - - *level0p = NULL; - } - - LASSERT (ldu->ldu_memhog_pages == 0); -} - -int -kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags) -{ - cfs_page_t **level0p; - cfs_page_t **level1p; - cfs_page_t **level2p; - int count1; - int count2; - - LASSERT (ldu->ldu_memhog_pages == 0); - LASSERT (ldu->ldu_memhog_root_page == NULL); - - if (npages < 0) - return -EINVAL; - - if (npages == 0) - return 0; - - level0p = &ldu->ldu_memhog_root_page; - *level0p = cfs_alloc_page(flags); - if (*level0p == NULL) - return -ENOMEM; - ldu->ldu_memhog_pages++; - - level1p = (cfs_page_t **)cfs_page_address(*level0p); - count1 = 0; - memset(level1p, 0, CFS_PAGE_SIZE); - - while (ldu->ldu_memhog_pages < npages && - count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { - - if (cfs_signal_pending()) - return (-EINTR); - - *level1p = cfs_alloc_page(flags); - if (*level1p == NULL) - return -ENOMEM; - ldu->ldu_memhog_pages++; - - level2p = (cfs_page_t **)cfs_page_address(*level1p); - count2 = 0; - memset(level2p, 0, CFS_PAGE_SIZE); - - while (ldu->ldu_memhog_pages < npages && - count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { - - if (cfs_signal_pending()) - return (-EINTR); - - *level2p = cfs_alloc_page(flags); - if (*level2p == NULL) - return (-ENOMEM); - ldu->ldu_memhog_pages++; - - level2p++; - count2++; - } - - level1p++; - count1++; - } - - return 0; -} - -/* called when opening /dev/device */ -static int libcfs_psdev_open(unsigned long flags, void *args) -{ - struct libcfs_device_userstate *ldu; - ENTRY; - - PORTAL_MODULE_USE; - - LIBCFS_ALLOC(ldu, sizeof(*ldu)); - if (ldu != NULL) { - ldu->ldu_memhog_pages = 0; - ldu->ldu_memhog_root_page = NULL; - } - *(struct libcfs_device_userstate **)args = ldu; - - RETURN(0); -} - -/* called when closing /dev/device */ -static int libcfs_psdev_release(unsigned long flags, void *args) -{ - struct libcfs_device_userstate *ldu; - ENTRY; - - ldu = (struct libcfs_device_userstate *)args; - if (ldu != NULL) { - kportal_memhog_free(ldu); - LIBCFS_FREE(ldu, sizeof(*ldu)); - } - - PORTAL_MODULE_UNUSE; - RETURN(0); -} - -static struct rw_semaphore ioctl_list_sem; -static struct list_head ioctl_list; - -int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (!list_empty(&hand->item)) - rc = -EBUSY; - else - list_add_tail(&hand->item, &ioctl_list); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_register_ioctl); - -int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) -{ - int rc = 0; - - down_write(&ioctl_list_sem); - if (list_empty(&hand->item)) - rc = -ENOENT; - else - list_del_init(&hand->item); - up_write(&ioctl_list_sem); - - return rc; -} -EXPORT_SYMBOL(libcfs_deregister_ioctl); - -static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg) -{ - char buf[1024]; - int err = -EINVAL; - struct libcfs_ioctl_data *data; - ENTRY; - - /* 'cmd' and permissions get checked in our arch-specific caller */ - - if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) { - CERROR("PORTALS ioctl: data error\n"); - RETURN(-EINVAL); - } - data = (struct libcfs_ioctl_data *)buf; - - switch (cmd) { - case IOC_LIBCFS_CLEAR_DEBUG: - libcfs_debug_clear_buffer(); - RETURN(0); - /* - * case IOC_LIBCFS_PANIC: - * Handled in arch/cfs_module.c - */ - case IOC_LIBCFS_MARK_DEBUG: - if (data->ioc_inlbuf1 == NULL || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - RETURN(-EINVAL); - libcfs_debug_mark_buffer(data->ioc_inlbuf1); - RETURN(0); -#if LWT_SUPPORT - case IOC_LIBCFS_LWT_CONTROL: - err = lwt_control ((data->ioc_flags & 1) != 0, - (data->ioc_flags & 2) != 0); - break; - - case IOC_LIBCFS_LWT_SNAPSHOT: { - cycles_t now; - int ncpu; - int total_size; - - err = lwt_snapshot (&now, &ncpu, &total_size, - data->ioc_pbuf1, data->ioc_plen1); - data->ioc_u64[0] = now; - data->ioc_u32[0] = ncpu; - data->ioc_u32[1] = total_size; - - /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */ - data->ioc_u32[2] = sizeof(lwt_event_t); - data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where); - - if (err == 0 && - libcfs_ioctl_popdata(arg, data, sizeof (*data))) - err = -EFAULT; - break; - } - - case IOC_LIBCFS_LWT_LOOKUP_STRING: - err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1, - data->ioc_pbuf2, data->ioc_plen2); - if (err == 0 && - libcfs_ioctl_popdata(arg, data, sizeof (*data))) - err = -EFAULT; - break; -#endif - case IOC_LIBCFS_MEMHOG: - if (pfile->private_data == NULL) { - err = -EINVAL; - } else { - kportal_memhog_free(pfile->private_data); - /* XXX The ioc_flags is not GFP flags now, need to be fixed */ - err = kportal_memhog_alloc(pfile->private_data, - data->ioc_count, - data->ioc_flags); - if (err != 0) - kportal_memhog_free(pfile->private_data); - } - break; - - case IOC_LIBCFS_PING_TEST: { - extern void (kping_client)(struct libcfs_ioctl_data *); - void (*ping)(struct libcfs_ioctl_data *); - - CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n", - data->ioc_count, libcfs_nid2str(data->ioc_nid), - libcfs_nid2str(data->ioc_nid)); - ping = PORTAL_SYMBOL_GET(kping_client); - if (!ping) - CERROR("PORTAL_SYMBOL_GET failed\n"); - else { - ping(data); - PORTAL_SYMBOL_PUT(kping_client); - } - RETURN(0); - } - - default: { - struct libcfs_ioctl_handler *hand; - err = -EINVAL; - down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(cmd, data); - if (err != -EINVAL) { - if (err == 0) - err = libcfs_ioctl_popdata(arg, - data, sizeof (*data)); - break; - } - } - up_read(&ioctl_list_sem); - break; - } - } - - RETURN(err); -} - -struct cfs_psdev_ops libcfs_psdev_ops = { - libcfs_psdev_open, - libcfs_psdev_release, - NULL, - NULL, - libcfs_ioctl -}; - -extern int insert_proc(void); -extern void remove_proc(void); -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -extern cfs_psdev_t libcfs_dev; -extern struct rw_semaphore tracefile_sem; -extern struct semaphore trace_thread_sem; - -extern void libcfs_init_nidstrings(void); -extern int libcfs_arch_init(void); -extern void libcfs_arch_cleanup(void); - -static int init_libcfs_module(void) -{ - int rc; - - libcfs_arch_init(); - libcfs_init_nidstrings(); - init_rwsem(&tracefile_sem); - init_mutex(&trace_thread_sem); - init_rwsem(&ioctl_list_sem); - CFS_INIT_LIST_HEAD(&ioctl_list); - - rc = libcfs_debug_init(5 * 1024 * 1024); - if (rc < 0) { - printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc); - return (rc); - } - -#if LWT_SUPPORT - rc = lwt_init(); - if (rc != 0) { - CERROR("lwt_init: error %d\n", rc); - goto cleanup_debug; - } -#endif - rc = cfs_psdev_register(&libcfs_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_lwt; - } - - rc = insert_proc(); - if (rc) { - CERROR("insert_proc: error %d\n", rc); - goto cleanup_deregister; - } - - CDEBUG (D_OTHER, "portals setup OK\n"); - return (0); - - cleanup_deregister: - cfs_psdev_deregister(&libcfs_dev); - cleanup_lwt: -#if LWT_SUPPORT - lwt_fini(); - cleanup_debug: -#endif - libcfs_debug_cleanup(); - return rc; -} - -static void exit_libcfs_module(void) -{ - int rc; - - remove_proc(); - - CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", - atomic_read(&libcfs_kmemory)); - - rc = cfs_psdev_deregister(&libcfs_dev); - if (rc) - CERROR("misc_deregister error %d\n", rc); - -#if LWT_SUPPORT - lwt_fini(); -#endif - - if (atomic_read(&libcfs_kmemory) != 0) - CERROR("Portals memory leaked: %d bytes\n", - atomic_read(&libcfs_kmemory)); - - rc = libcfs_debug_cleanup(); - if (rc) - printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc); - libcfs_arch_cleanup(); -} - -cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module); diff --git a/lnet/libcfs/nidstrings.c b/lnet/libcfs/nidstrings.c deleted file mode 100644 index 78a255d89e6a61a5e29c12d3a022b67b76464bc3..0000000000000000000000000000000000000000 --- a/lnet/libcfs/nidstrings.c +++ /dev/null @@ -1,533 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lnet.h> -#include <libcfs/kp30.h> -#ifndef __KERNEL__ -#ifdef HAVE_GETHOSTBYNAME -# include <netdb.h> -#endif -#endif - -/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids - * consistent in all conversion functions. Some code fragments are copied - * around for the sake of clarity... - */ - -/* CAVEAT EMPTOR! Racey temporary buffer allocation! - * Choose the number of nidstrings to support the MAXIMUM expected number of - * concurrent users. If there are more, the returned string will be volatile. - * NB this number must allow for a process to be descheduled for a timeslice - * between getting its string and using it. - */ - -#define LNET_NIDSTR_COUNT 128 /* # of nidstrings */ -#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ - -static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE]; -static int libcfs_nidstring_idx = 0; - -#ifdef __KERNEL__ -static spinlock_t libcfs_nidstring_lock; - -void libcfs_init_nidstrings (void) -{ - spin_lock_init(&libcfs_nidstring_lock); -} - -# define NIDSTR_LOCK(f) spin_lock_irqsave(&libcfs_nidstring_lock, f) -# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f) -#else -# define NIDSTR_LOCK(f) (f=0) /* avoid unused var warnings */ -# define NIDSTR_UNLOCK(f) (f=0) -#endif - -static char * -libcfs_next_nidstring (void) -{ - char *str; - unsigned long flags; - - NIDSTR_LOCK(flags); - - str = libcfs_nidstrings[libcfs_nidstring_idx++]; - if (libcfs_nidstring_idx == - sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0])) - libcfs_nidstring_idx = 0; - - NIDSTR_UNLOCK(flags); - return str; -} - -static int libcfs_lo_str2addr(char *str, int nob, __u32 *addr); -static void libcfs_ip_addr2str(__u32 addr, char *str); -static int libcfs_ip_str2addr(char *str, int nob, __u32 *addr); -static void libcfs_decnum_addr2str(__u32 addr, char *str); -static void libcfs_hexnum_addr2str(__u32 addr, char *str); -static int libcfs_num_str2addr(char *str, int nob, __u32 *addr); - -struct netstrfns { - int nf_type; - char *nf_name; - char *nf_modname; - void (*nf_addr2str)(__u32 addr, char *str); - int (*nf_str2addr)(char *str, int nob, __u32 *addr); -}; - -static struct netstrfns libcfs_netstrfns[] = { - {/* .nf_type */ LOLND, - /* .nf_name */ "lo", - /* .nf_modname */ "klolnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_lo_str2addr}, - {/* .nf_type */ SOCKLND, - /* .nf_name */ "tcp", - /* .nf_modname */ "ksocklnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ O2IBLND, - /* .nf_name */ "o2ib", - /* .nf_modname */ "ko2iblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ CIBLND, - /* .nf_name */ "cib", - /* .nf_modname */ "kciblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ OPENIBLND, - /* .nf_name */ "openib", - /* .nf_modname */ "kopeniblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ IIBLND, - /* .nf_name */ "iib", - /* .nf_modname */ "kiiblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ VIBLND, - /* .nf_name */ "vib", - /* .nf_modname */ "kviblnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ RALND, - /* .nf_name */ "ra", - /* .nf_modname */ "kralnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ QSWLND, - /* .nf_name */ "elan", - /* .nf_modname */ "kqswlnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - {/* .nf_type */ GMLND, - /* .nf_name */ "gm", - /* .nf_modname */ "kgmlnd", - /* .nf_addr2str */ libcfs_hexnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - {/* .nf_type */ MXLND, - /* .nf_name */ "mx", - /* .nf_modname */ "kmxlnd", - /* .nf_addr2str */ libcfs_ip_addr2str, - /* .nf_str2addr */ libcfs_ip_str2addr}, - {/* .nf_type */ PTLLND, - /* .nf_name */ "ptl", - /* .nf_modname */ "kptllnd", - /* .nf_addr2str */ libcfs_decnum_addr2str, - /* .nf_str2addr */ libcfs_num_str2addr}, - /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ - {/* .nf_type */ -1}, -}; - -const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]); - -int -libcfs_lo_str2addr(char *str, int nob, __u32 *addr) -{ - *addr = 0; - return 1; -} - -void -libcfs_ip_addr2str(__u32 addr, char *str) -{ -#if 0 /* never lookup */ -#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME - __u32 netip = htonl(addr); - struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET); - - if (he != NULL) { - snprintf(str, LNET_NIDSTR_SIZE, "%s", he->h_name); - return; - } -#endif -#endif - snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u", - (addr >> 24) & 0xff, (addr >> 16) & 0xff, - (addr >> 8) & 0xff, addr & 0xff); -} - -/* CAVEAT EMPTOR XscanfX - * I use "%n" at the end of a sscanf format to detect trailing junk. However - * sscanf may return immediately if it sees the terminating '0' in a string, so - * I initialise the %n variable to the expected length. If sscanf sets it; - * fine, if it doesn't, then the scan ended at the end of the string, which is - * fine too :) */ - -int -libcfs_ip_str2addr(char *str, int nob, __u32 *addr) -{ - int a; - int b; - int c; - int d; - int n = nob; /* XscanfX */ - - /* numeric IP? */ - if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && - n == nob && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) { - *addr = ((a<<24)|(b<<16)|(c<<8)|d); - return 1; - } - -#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME - /* known hostname? */ - if (('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) { - char *tmp; - - LIBCFS_ALLOC(tmp, nob + 1); - if (tmp != NULL) { - struct hostent *he; - - memcpy(tmp, str, nob); - tmp[nob] = 0; - - he = gethostbyname(tmp); - - LIBCFS_FREE(tmp, nob); - - if (he != NULL) { - __u32 ip = *(__u32 *)he->h_addr; - - *addr = ntohl(ip); - return 1; - } - } - } -#endif - return 0; -} - -void -libcfs_decnum_addr2str(__u32 addr, char *str) -{ - snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); -} - -void -libcfs_hexnum_addr2str(__u32 addr, char *str) -{ - snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); -} - -int -libcfs_num_str2addr(char *str, int nob, __u32 *addr) -{ - int n; - - n = nob; - if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) - return 1; - - n = nob; - if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) - return 1; - - return 0; -} - -struct netstrfns * -libcfs_lnd2netstrfns(int lnd) -{ - int i; - - if (lnd >= 0) - for (i = 0; i < libcfs_nnetstrfns; i++) - if (lnd == libcfs_netstrfns[i].nf_type) - return &libcfs_netstrfns[i]; - - return NULL; -} - -struct netstrfns * -libcfs_name2netstrfns(char *name) -{ - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) - if (libcfs_netstrfns[i].nf_type >= 0 && - !strcmp(libcfs_netstrfns[i].nf_name, name)) - return &libcfs_netstrfns[i]; - - return NULL; -} - -int -libcfs_isknown_lnd(int type) -{ - return libcfs_lnd2netstrfns(type) != NULL; -} - -char * -libcfs_lnd2modname(int lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - return (nf == NULL) ? NULL : nf->nf_modname; -} - -char * -libcfs_lnd2str(int lnd) -{ - char *str; - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - - if (nf != NULL) - return nf->nf_name; - - str = libcfs_next_nidstring(); - snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd); - return str; -} - -int -libcfs_str2lnd(char *str) -{ - struct netstrfns *nf = libcfs_name2netstrfns(str); - - if (nf != NULL) - return nf->nf_type; - - return -1; -} - -char * -libcfs_net2str(__u32 net) -{ - int lnd = LNET_NETTYP(net); - int num = LNET_NETNUM(net); - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - char *str = libcfs_next_nidstring(); - - if (nf == NULL) - snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num); - else if (num == 0) - snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name); - else - snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num); - - return str; -} - -char * -libcfs_nid2str(lnet_nid_t nid) -{ - __u32 addr = LNET_NIDADDR(nid); - __u32 net = LNET_NIDNET(nid); - int lnd = LNET_NETTYP(net); - int nnum = LNET_NETNUM(net); - struct netstrfns *nf; - char *str; - int nob; - - if (nid == LNET_NID_ANY) - return "LNET_NID_ANY"; - - nf = libcfs_lnd2netstrfns(lnd); - str = libcfs_next_nidstring(); - - if (nf == NULL) - snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum); - else { - nf->nf_addr2str(addr, str); - nob = strlen(str); - if (nnum == 0) - snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s", - nf->nf_name); - else - snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u", - nf->nf_name, nnum); - } - - return str; -} - -static struct netstrfns * -libcfs_str2net_internal(char *str, __u32 *net) -{ - struct netstrfns *nf; - int nob; - int netnum; - int i; - - for (i = 0; i < libcfs_nnetstrfns; i++) { - nf = &libcfs_netstrfns[i]; - if (nf->nf_type >= 0 && - !strncmp(str, nf->nf_name, strlen(nf->nf_name))) - break; - } - - if (i == libcfs_nnetstrfns) - return NULL; - - nob = strlen(nf->nf_name); - - if (strlen(str) == (unsigned int)nob) { - netnum = 0; - } else { - if (nf->nf_type == LOLND) /* net number not allowed */ - return NULL; - - str += nob; - i = strlen(str); - if (sscanf(str, "%u%n", &netnum, &i) < 1 || - i != (int)strlen(str)) - return NULL; - } - - *net = LNET_MKNET(nf->nf_type, netnum); - return nf; -} - -__u32 -libcfs_str2net(char *str) -{ - __u32 net; - - if (libcfs_str2net_internal(str, &net) != NULL) - return net; - - return LNET_NIDNET(LNET_NID_ANY); -} - -lnet_nid_t -libcfs_str2nid(char *str) -{ - char *sep = strchr(str, '@'); - struct netstrfns *nf; - __u32 net; - __u32 addr; - - if (sep != NULL) { - nf = libcfs_str2net_internal(sep + 1, &net); - if (nf == NULL) - return LNET_NID_ANY; - } else { - sep = str + strlen(str); - net = LNET_MKNET(SOCKLND, 0); - nf = libcfs_lnd2netstrfns(SOCKLND); - LASSERT (nf != NULL); - } - - if (!nf->nf_str2addr(str, sep - str, &addr)) - return LNET_NID_ANY; - - return LNET_MKNID(net, addr); -} - -char * -libcfs_id2str(lnet_process_id_t id) -{ - char *str = libcfs_next_nidstring(); - - snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", - ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "", - (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid)); - return str; -} - -int -libcfs_str2anynid(lnet_nid_t *nidp, char *str) -{ - if (!strcmp(str, "*")) { - *nidp = LNET_NID_ANY; - return 1; - } - - *nidp = libcfs_str2nid(str); - return *nidp != LNET_NID_ANY; -} - -#ifdef __KERNEL__ -void -libcfs_setnet0alias(int lnd) -{ - struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); - struct netstrfns *nf0 = &libcfs_netstrfns[libcfs_nnetstrfns - 1]; - - /* Ghastly hack to allow LNET to inter-operate with portals. - * NET type 0 becomes an alias for whatever local network we have, and - * this assignment here means we can parse and print its NIDs */ - - LASSERT (nf != NULL); - LASSERT (nf0->nf_type < 0); - - nf0->nf_name = "zero";//nf->nf_name; - nf0->nf_modname = nf->nf_modname; - nf0->nf_addr2str = nf->nf_addr2str; - nf0->nf_str2addr = nf->nf_str2addr; - mb(); - nf0->nf_type = 0; -} - -EXPORT_SYMBOL(libcfs_isknown_lnd); -EXPORT_SYMBOL(libcfs_lnd2modname); -EXPORT_SYMBOL(libcfs_lnd2str); -EXPORT_SYMBOL(libcfs_str2lnd); -EXPORT_SYMBOL(libcfs_net2str); -EXPORT_SYMBOL(libcfs_nid2str); -EXPORT_SYMBOL(libcfs_str2net); -EXPORT_SYMBOL(libcfs_str2nid); -EXPORT_SYMBOL(libcfs_id2str); -EXPORT_SYMBOL(libcfs_str2anynid); -EXPORT_SYMBOL(libcfs_setnet0alias); -#else /* __KERNEL__ */ -void -libcfs_setnet0alias(int lnd) -{ - LCONSOLE_ERROR("Liblustre cannot interoperate with old Portals.\n" - "portals_compatibility must be set to 'none'.\n"); -} -#endif diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c deleted file mode 100644 index 0b8e61ee1200f29241470505bbb8c0161b3b2f24..0000000000000000000000000000000000000000 --- a/lnet/libcfs/tracefile.c +++ /dev/null @@ -1,951 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE -#include "tracefile.h" - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> - -/* XXX move things up to the top, comment */ -union trace_data_union trace_data[NR_CPUS] __cacheline_aligned; - -char *tracefile = NULL; -int64_t tracefile_size = TRACEFILE_SIZE; -static struct tracefiled_ctl trace_tctl; -struct semaphore trace_thread_sem; -static int thread_running = 0; - -atomic_t tage_allocated = ATOMIC_INIT(0); - -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct trace_cpu_data *tcd); - -static inline struct trace_page *tage_from_list(struct list_head *list) -{ - return list_entry(list, struct trace_page, linkage); -} - -static struct trace_page *tage_alloc(int gfp) -{ - cfs_page_t *page; - struct trace_page *tage; - - /* - * Don't spam console with allocation failures: they will be reported - * by upper layer anyway. - */ - gfp |= CFS_ALLOC_NOWARN; - page = cfs_alloc_page(gfp); - if (page == NULL) - return NULL; - - tage = cfs_alloc(sizeof(*tage), gfp); - if (tage == NULL) { - cfs_free_page(page); - return NULL; - } - - tage->page = page; - atomic_inc(&tage_allocated); - return tage; -} - -static void tage_free(struct trace_page *tage) -{ - __LASSERT(tage != NULL); - __LASSERT(tage->page != NULL); - - cfs_free_page(tage->page); - cfs_free(tage); - atomic_dec(&tage_allocated); -} - -static void tage_to_tail(struct trace_page *tage, struct list_head *queue) -{ - __LASSERT(tage != NULL); - __LASSERT(queue != NULL); - - list_move_tail(&tage->linkage, queue); -} - -int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, - struct list_head *stock) -{ - int i; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) { - struct trace_page *tage; - - tage = tage_alloc(gfp); - if (tage == NULL) - break; - list_add_tail(&tage->linkage, stock); - } - return i; -} - -/* return a page that has 'len' bytes left at the end */ -static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd, - unsigned long len) -{ - struct trace_page *tage; - - if (tcd->tcd_cur_pages > 0) { - __LASSERT(!list_empty(&tcd->tcd_pages)); - tage = tage_from_list(tcd->tcd_pages.prev); - if (tage->used + len <= CFS_PAGE_SIZE) - return tage; - } - - if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - if (tcd->tcd_cur_stock_pages > 0) { - tage = tage_from_list(tcd->tcd_stock_pages.prev); - -- tcd->tcd_cur_stock_pages; - list_del_init(&tage->linkage); - } else { - tage = tage_alloc(CFS_ALLOC_ATOMIC); - if (tage == NULL) { - printk(KERN_WARNING - "failure to allocate a tage (%ld)\n", - tcd->tcd_cur_pages); - return NULL; - } - } - - tage->used = 0; - tage->cpu = smp_processor_id(); - list_add_tail(&tage->linkage, &tcd->tcd_pages); - tcd->tcd_cur_pages++; - - if (tcd->tcd_cur_pages > 8 && thread_running) { - struct tracefiled_ctl *tctl = &trace_tctl; - /* - * wake up tracefiled to process some pages. - */ - cfs_waitq_signal(&tctl->tctl_waitq); - } - return tage; - } - return NULL; -} - -static void tcd_shrink(struct trace_cpu_data *tcd) -{ - int pgcount = tcd->tcd_cur_pages / 10; - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - printk(KERN_WARNING "debug daemon buffer overflowed; discarding" - " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages); - - CFS_INIT_LIST_HEAD(&pc.pc_pages); - spin_lock_init(&pc.pc_lock); - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - if (pgcount-- == 0) - break; - - list_move_tail(&tage->linkage, &pc.pc_pages); - tcd->tcd_cur_pages--; - } - put_pages_on_tcd_daemon_list(&pc, tcd); -} - -/* return a page that has 'len' bytes left at the end */ -static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd, - unsigned long len) -{ - struct trace_page *tage; - - /* - * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) - * from here: this will lead to infinite recursion. - */ - - if (len > CFS_PAGE_SIZE) { - printk(KERN_ERR - "cowardly refusing to write %lu bytes in a page\n", len); - return NULL; - } - - tage = trace_get_tage_try(tcd, len); - if (tage != NULL) - return tage; - if (thread_running) - tcd_shrink(tcd); - if (tcd->tcd_cur_pages > 0) { - tage = tage_from_list(tcd->tcd_pages.next); - tage->used = 0; - tage_to_tail(tage, &tcd->tcd_pages); - } - return tage; -} - -int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask, - const char *file, const char *fn, const int line, - const char *format1, va_list args, - const char *format2, ...) -{ - struct trace_cpu_data *tcd = NULL; - struct ptldebug_header header; - struct trace_page *tage; - /* string_buf is used only if tcd != NULL, and is always set then */ - char *string_buf = NULL; - char *debug_buf; - int known_size; - int needed = 85; /* average message length */ - int max_nob; - va_list ap; - int depth; - int i; - int remain; - - if (strchr(file, '/')) - file = strrchr(file, '/') + 1; - - - set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK()); - - tcd = trace_get_tcd(); - if (tcd == NULL) /* arch may not log in IRQ context */ - goto console; - - if (tcd->tcd_shutting_down) { - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - depth = __current_nesting_level(); - known_size = strlen(file) + 1 + depth; - if (fn) - known_size += strlen(fn) + 1; - - if (libcfs_debug_binary) - known_size += sizeof(header); - - /*/ - * '2' used because vsnprintf return real size required for output - * _without_ terminating NULL. - * if needed is to small for this format. - */ - for (i=0;i<2;i++) { - tage = trace_get_tage(tcd, needed + known_size + 1); - if (tage == NULL) { - if (needed + known_size > CFS_PAGE_SIZE) - mask |= D_ERROR; - - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size; - - max_nob = CFS_PAGE_SIZE - tage->used - known_size; - if (max_nob <= 0) { - printk(KERN_EMERG "negative max_nob: %i\n", max_nob); - mask |= D_ERROR; - trace_put_tcd(tcd); - tcd = NULL; - goto console; - } - - needed = 0; - if (format1) { - va_copy(ap, args); - needed = vsnprintf(string_buf, max_nob, format1, ap); - va_end(ap); - } - - - if (format2) { - remain = max_nob - needed; - if (remain < 0) - remain = 0; - - va_start(ap, format2); - needed += vsnprintf(string_buf+needed, remain, format2, ap); - va_end(ap); - } - - if (needed < max_nob) /* well. printing ok.. */ - break; - } - - if (*(string_buf+needed-1) != '\n') - printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", - file, line, fn); - - header.ph_len = known_size + needed; - debug_buf = (char *)cfs_page_address(tage->page) + tage->used; - - if (libcfs_debug_binary) { - memcpy(debug_buf, &header, sizeof(header)); - tage->used += sizeof(header); - debug_buf += sizeof(header); - } - - /* indent message according to the nesting level */ - while (depth-- > 0) { - *(debug_buf++) = '.'; - ++ tage->used; - } - - strcpy(debug_buf, file); - tage->used += strlen(file) + 1; - debug_buf += strlen(file) + 1; - - if (fn) { - strcpy(debug_buf, fn); - tage->used += strlen(fn) + 1; - debug_buf += strlen(fn) + 1; - } - - __LASSERT(debug_buf == string_buf); - - tage->used += needed; - __LASSERT (tage->used <= CFS_PAGE_SIZE); - -console: - if (!((mask & D_CANTMASK) != 0 || (mask & libcfs_printk) != 0)) { - /* no console output requested */ - if (tcd != NULL) - trace_put_tcd(tcd); - return 1; - } - - if (cdls != NULL) { - cfs_time_t t = cdls->cdls_next + - cfs_time_seconds(CDEBUG_MAX_LIMIT + 10); - cfs_duration_t dmax = cfs_time_seconds(CDEBUG_MAX_LIMIT); - - if (libcfs_console_ratelimit && - cdls->cdls_next != 0 && /* not first time ever */ - !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { - /* skipping a console message */ - cdls->cdls_count++; - if (tcd != NULL) - trace_put_tcd(tcd); - return 1; - } - - if (cfs_time_after(cfs_time_current(), t)) { - /* last timeout was a long time ago */ - cdls->cdls_delay /= 8; - } else { - cdls->cdls_delay *= 2; - - if (cdls->cdls_delay < CFS_TICK) - cdls->cdls_delay = CFS_TICK; - else if (cdls->cdls_delay > dmax) - cdls->cdls_delay = dmax; - } - - /* ensure cdls_next is never zero after it's been seen */ - cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; - } - - if (tcd != NULL) { - print_to_console(&header, mask, string_buf, needed, file, fn); - trace_put_tcd(tcd); - } else { - string_buf = trace_get_console_buffer(); - - needed = 0; - if (format1 != NULL) { - va_copy(ap, args); - needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap); - va_end(ap); - } - if (format2 != NULL) { - remain = TRACE_CONSOLE_BUFFER_SIZE - needed; - if (remain > 0) { - va_start(ap, format2); - needed += vsnprintf(string_buf+needed, remain, format2, ap); - va_end(ap); - } - } - print_to_console(&header, mask, - string_buf, needed, file, fn); - - trace_put_console_buffer(string_buf); - } - - if (cdls != NULL && cdls->cdls_count != 0) { - string_buf = trace_get_console_buffer(); - - needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, - "Skipped %d previous similar message%s\n", - cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); - - print_to_console(&header, mask, - string_buf, needed, file, fn); - - trace_put_console_buffer(string_buf); - cdls->cdls_count = 0; - } - - return 0; -} -EXPORT_SYMBOL(libcfs_debug_vmsg2); - -void -libcfs_assertion_failed(const char *expr, const char *file, - const char *func, const int line) -{ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, - "ASSERTION(%s) failed\n", expr); - LBUG(); -} -EXPORT_SYMBOL(libcfs_assertion_failed); - -void -trace_assertion_failed(const char *str, - const char *fn, const char *file, int line) -{ - struct ptldebug_header hdr; - - libcfs_panic_in_progress = 1; - libcfs_catastrophe = 1; - mb(); - - set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line, - CDEBUG_STACK()); - - print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn); - - LIBCFS_PANIC("Lustre debug assertion failure\n"); - - /* not reached */ -} - -static void -panic_collect_pages(struct page_collection *pc) -{ - /* Do the collect_pages job on a single CPU: assumes that all other - * CPUs have been stopped during a panic. If this isn't true for some - * arch, this will have to be implemented separately in each arch. */ - int i; - struct trace_cpu_data *tcd; - - CFS_INIT_LIST_HEAD(&pc->pc_pages); - - for (i = 0; i < NR_CPUS; i++) { - tcd = &trace_data[i].tcd; - - list_splice(&tcd->tcd_pages, &pc->pc_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_pages); - tcd->tcd_cur_pages = 0; - - if (pc->pc_want_daemon_pages) { - list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages = 0; - } - } -} - -static void collect_pages_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - struct page_collection *pc = info; - - tcd = trace_get_tcd(); - __LASSERT (tcd != NULL); - - spin_lock(&pc->pc_lock); - list_splice(&tcd->tcd_pages, &pc->pc_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_pages); - tcd->tcd_cur_pages = 0; - if (pc->pc_want_daemon_pages) { - list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages = 0; - } - spin_unlock(&pc->pc_lock); - - trace_put_tcd(tcd); -} - -static void collect_pages(struct page_collection *pc) -{ - CFS_INIT_LIST_HEAD(&pc->pc_pages); - - if (libcfs_panic_in_progress) - panic_collect_pages(pc); - else - trace_call_on_all_cpus(collect_pages_on_cpu, pc); -} - -static void put_pages_back_on_cpu(void *info) -{ - struct page_collection *pc = info; - struct trace_cpu_data *tcd; - struct list_head *cur_head; - struct trace_page *tage; - struct trace_page *tmp; - - tcd = trace_get_tcd(); - __LASSERT (tcd != NULL); - - cur_head = tcd->tcd_pages.next; - - spin_lock(&pc->pc_lock); - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != smp_processor_id()) - continue; - - tage_to_tail(tage, cur_head); - tcd->tcd_cur_pages++; - } - spin_unlock(&pc->pc_lock); - - trace_put_tcd(tcd); -} - -static void put_pages_back(struct page_collection *pc) -{ - if (!libcfs_panic_in_progress) - trace_call_on_all_cpus(put_pages_back_on_cpu, pc); -} - -/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that - * we have a good amount of data at all times for dumping during an LBUG, even - * if we have been steadily writing (and otherwise discarding) pages via the - * debug daemon. */ -static void put_pages_on_tcd_daemon_list(struct page_collection *pc, - struct trace_cpu_data *tcd) -{ - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock(&pc->pc_lock); - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - if (tage->cpu != smp_processor_id()) - continue; - - tage_to_tail(tage, &tcd->tcd_daemon_pages); - tcd->tcd_cur_daemon_pages++; - - if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { - struct trace_page *victim; - - __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - victim = tage_from_list(tcd->tcd_daemon_pages.next); - - __LASSERT_TAGE_INVARIANT(victim); - - list_del(&victim->linkage); - tage_free(victim); - tcd->tcd_cur_daemon_pages--; - } - } - spin_unlock(&pc->pc_lock); -} - -static void put_pages_on_daemon_list_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - - tcd = trace_get_tcd(); - __LASSERT (tcd != NULL); - - put_pages_on_tcd_daemon_list(info, tcd); - - trace_put_tcd(tcd); -} - -static void put_pages_on_daemon_list(struct page_collection *pc) -{ - trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc); -} - -void trace_debug_print(void) -{ - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock_init(&pc.pc_lock); - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - char *p, *file, *fn; - cfs_page_t *page; - - __LASSERT_TAGE_INVARIANT(tage); - - page = tage->page; - p = cfs_page_address(page); - while (p < ((char *)cfs_page_address(page) + tage->used)) { - struct ptldebug_header *hdr; - int len; - hdr = (void *)p; - p += sizeof(*hdr); - file = p; - p += strlen(file) + 1; - fn = p; - p += strlen(fn) + 1; - len = hdr->ph_len - (p - (char *)hdr); - - print_to_console(hdr, D_EMERG, p, len, file, fn); - - p += len; - } - - list_del(&tage->linkage); - tage_free(tage); - } -} - -int tracefile_dump_all_pages(char *filename) -{ - struct page_collection pc; - cfs_file_t *filp; - struct trace_page *tage; - struct trace_page *tmp; - int rc; - - CFS_DECL_MMSPACE; - - tracefile_write_lock(); - - filp = cfs_filp_open(filename, - O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); - if (!filp) { - printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", - filename, rc); - goto out; - } - - spin_lock_init(&pc.pc_lock); - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) { - rc = 0; - goto close; - } - - /* ok, for now, just write the pages. in the future we'll be building - * iobufs with the pages and calling generic_direct_IO */ - CFS_MMSPACE_OPEN; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - rc = cfs_filp_write(filp, cfs_page_address(tage->page), - tage->used, cfs_filp_poff(filp)); - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but wrote " - "%d\n", tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - break; - } - list_del(&tage->linkage); - tage_free(tage); - } - CFS_MMSPACE_CLOSE; - rc = cfs_filp_fsync(filp); - if (rc) - printk(KERN_ERR "sync returns %d\n", rc); - close: - cfs_filp_close(filp); - out: - tracefile_write_unlock(); - return rc; -} - -void trace_flush_pages(void) -{ - struct page_collection pc; - struct trace_page *tage; - struct trace_page *tmp; - - spin_lock_init(&pc.pc_lock); - - pc.pc_want_daemon_pages = 1; - collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - tage_free(tage); - } -} - -int trace_dk(struct file *file, const char *buffer, unsigned long count, - void *data) -{ - char *name; - unsigned long off; - int rc; - - name = cfs_alloc(count + 1, CFS_ALLOC_STD); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user((void *)name, (void *)buffer, count)) { - rc = -EFAULT; - goto out; - } - -#if !defined(__WINNT__) - if (name[0] != '/') { - rc = -EINVAL; - goto out; - } -#endif - - /* be nice and strip out trailing '\n' */ - for (off = count ; off > 2 && isspace(name[off - 1]); off--) - ; - - name[off] = '\0'; - rc = tracefile_dump_all_pages(name); -out: - if (name) - cfs_free(name); - return count; -} -EXPORT_SYMBOL(trace_dk); - -static int tracefiled(void *arg) -{ - struct page_collection pc; - struct tracefiled_ctl *tctl = arg; - struct trace_page *tage; - struct trace_page *tmp; - struct ptldebug_header *hdr; - cfs_file_t *filp; - int rc; - - CFS_DECL_MMSPACE; - - /* we're started late enough that we pick up init's fs context */ - /* this is so broken in uml? what on earth is going on? */ - cfs_daemonize("ktracefiled"); - - spin_lock_init(&pc.pc_lock); - complete(&tctl->tctl_start); - - while (1) { - cfs_waitlink_t __wait; - - cfs_waitlink_init(&__wait); - cfs_waitq_add(&tctl->tctl_waitq, &__wait); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); - cfs_waitq_del(&tctl->tctl_waitq, &__wait); - - if (atomic_read(&tctl->tctl_shutdown)) - break; - - pc.pc_want_daemon_pages = 0; - collect_pages(&pc); - if (list_empty(&pc.pc_pages)) - continue; - - filp = NULL; - tracefile_read_lock(); - if (tracefile != NULL) { - filp = cfs_filp_open(tracefile, - O_CREAT | O_RDWR | O_LARGEFILE, - 0600, &rc); - if (!(filp)) - printk("couldn't open %s: %d\n", tracefile, rc); - } - tracefile_read_unlock(); - if (filp == NULL) { - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - continue; - } - - CFS_MMSPACE_OPEN; - - /* mark the first header, so we can sort in chunks */ - tage = tage_from_list(pc.pc_pages.next); - __LASSERT_TAGE_INVARIANT(tage); - - hdr = cfs_page_address(tage->page); - hdr->ph_flags |= PH_FLAG_FIRST_RECORD; - - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - static loff_t f_pos; - - __LASSERT_TAGE_INVARIANT(tage); - - if (f_pos >= (off_t)tracefile_size) - f_pos = 0; - else if (f_pos > cfs_filp_size(filp)) - f_pos = cfs_filp_size(filp); - - rc = cfs_filp_write(filp, cfs_page_address(tage->page), - tage->used, &f_pos); - if (rc != (int)tage->used) { - printk(KERN_WARNING "wanted to write %u but " - "wrote %d\n", tage->used, rc); - put_pages_back(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - } - } - CFS_MMSPACE_CLOSE; - - cfs_filp_close(filp); - put_pages_on_daemon_list(&pc); - __LASSERT(list_empty(&pc.pc_pages)); - } - complete(&tctl->tctl_stop); - return 0; -} - -int trace_start_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - int rc = 0; - - mutex_down(&trace_thread_sem); - if (thread_running) - goto out; - - init_completion(&tctl->tctl_start); - init_completion(&tctl->tctl_stop); - cfs_waitq_init(&tctl->tctl_waitq); - atomic_set(&tctl->tctl_shutdown, 0); - - if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) { - rc = -ECHILD; - goto out; - } - - wait_for_completion(&tctl->tctl_start); - thread_running = 1; -out: - mutex_up(&trace_thread_sem); - return rc; -} - -void trace_stop_thread(void) -{ - struct tracefiled_ctl *tctl = &trace_tctl; - - mutex_down(&trace_thread_sem); - if (thread_running) { - printk(KERN_INFO "Shutting down debug daemon thread...\n"); - atomic_set(&tctl->tctl_shutdown, 1); - wait_for_completion(&tctl->tctl_stop); - thread_running = 0; - } - mutex_up(&trace_thread_sem); -} - -int tracefile_init(void) -{ - struct trace_cpu_data *tcd; - int i; - int rc; - - rc = tracefile_init_arch(); - if (rc != 0) - return rc; - - for (i = 0; i < NR_CPUS; i++) { - tcd = &trace_data[i].tcd; - CFS_INIT_LIST_HEAD(&tcd->tcd_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages); - CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); - tcd->tcd_cur_pages = 0; - tcd->tcd_cur_stock_pages = 0; - tcd->tcd_cur_daemon_pages = 0; - tcd->tcd_max_pages = TCD_MAX_PAGES; - tcd->tcd_shutting_down = 0; - tcd->tcd_cpu = i; - } - - return 0; -} - -static void trace_cleanup_on_cpu(void *info) -{ - struct trace_cpu_data *tcd; - struct trace_page *tage; - struct trace_page *tmp; - - tcd = trace_get_tcd(); - __LASSERT (tcd != NULL); - - tcd->tcd_shutting_down = 1; - - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { - __LASSERT_TAGE_INVARIANT(tage); - - list_del(&tage->linkage); - tage_free(tage); - } - tcd->tcd_cur_pages = 0; - - trace_put_tcd(tcd); -} - -static void trace_cleanup(void) -{ - struct page_collection pc; - - CFS_INIT_LIST_HEAD(&pc.pc_pages); - spin_lock_init(&pc.pc_lock); - - trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc); - - tracefile_fini_arch(); -} - -void tracefile_exit(void) -{ - trace_stop_thread(); - trace_cleanup(); -} diff --git a/lnet/libcfs/tracefile.h b/lnet/libcfs/tracefile.h deleted file mode 100644 index f3568e93b29e298af0a25d539cd28974669c21d3..0000000000000000000000000000000000000000 --- a/lnet/libcfs/tracefile.h +++ /dev/null @@ -1,210 +0,0 @@ -#ifndef __LIBCFS_TRACEFILE_H__ -#define __LIBCFS_TRACEFILE_H__ - -#include <libcfs/libcfs.h> - -/* trace file lock routines */ - -int tracefile_init_arch(void); -void tracefile_fini_arch(void); - -void tracefile_read_lock(void); -void tracefile_read_unlock(void); -void tracefile_write_lock(void); -void tracefile_write_unlock(void); - -int tracefile_dump_all_pages(char *filename); -void trace_debug_print(void); -void trace_flush_pages(void); -int trace_start_thread(void); -void trace_stop_thread(void); -int tracefile_init(void); -void tracefile_exit(void); -int trace_write_daemon_file(struct file *file, const char *buffer, - unsigned long count, void *data); -int trace_read_daemon_file(char *page, char **start, off_t off, int count, - int *eof, void *data); -int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data); -int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data); -int trace_dk(struct file *file, const char *buffer, unsigned long count, - void *data); - -extern void libcfs_debug_dumplog_internal(void *arg); -extern void libcfs_register_panic_notifier(void); -extern void libcfs_unregister_panic_notifier(void); -extern int libcfs_panic_in_progress; - -#ifdef LUSTRE_TRACEFILE_PRIVATE -/* - * Private declare for tracefile - */ -#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT)) -#define TCD_STOCK_PAGES (TCD_MAX_PAGES) - -#define TRACEFILE_SIZE (500 << 20) - -/* Size of a buffer for sprinting console messages to in IRQ context (no - * logging in IRQ context) */ -#define TRACE_CONSOLE_BUFFER_SIZE 1024 - -union trace_data_union { - struct trace_cpu_data { - /* - * pages with trace records not yet processed by tracefiled. - */ - struct list_head tcd_pages; - /* number of pages on ->tcd_pages */ - unsigned long tcd_cur_pages; - - /* - * pages with trace records already processed by - * tracefiled. These pages are kept in memory, so that some - * portion of log can be written in the event of LBUG. This - * list is maintained in LRU order. - * - * Pages are moved to ->tcd_daemon_pages by tracefiled() - * (put_pages_on_daemon_list()). LRU pages from this list are - * discarded when list grows too large. - */ - struct list_head tcd_daemon_pages; - /* number of pages on ->tcd_daemon_pages */ - unsigned long tcd_cur_daemon_pages; - - /* - * Maximal number of pages allowed on ->tcd_pages and - * ->tcd_daemon_pages each. Always TCD_MAX_PAGES in current - * implementation. - */ - unsigned long tcd_max_pages; - - /* - * preallocated pages to write trace records into. Pages from - * ->tcd_stock_pages are moved to ->tcd_pages by - * portals_debug_msg(). - * - * This list is necessary, because on some platforms it's - * impossible to perform efficient atomic page allocation in a - * non-blockable context. - * - * Such platforms fill ->tcd_stock_pages "on occasion", when - * tracing code is entered in blockable context. - * - * trace_get_tage_try() tries to get a page from - * ->tcd_stock_pages first and resorts to atomic page - * allocation only if this queue is empty. ->tcd_stock_pages - * is replenished when tracing code is entered in blocking - * context (darwin-tracefile.c:trace_get_tcd()). We try to - * maintain TCD_STOCK_PAGES (40 by default) pages in this - * queue. Atomic allocation is only required if more than - * TCD_STOCK_PAGES pagesful are consumed by trace records all - * emitted in non-blocking contexts. Which is quite unlikely. - */ - struct list_head tcd_stock_pages; - /* number of pages on ->tcd_stock_pages */ - unsigned long tcd_cur_stock_pages; - - int tcd_shutting_down; - int tcd_cpu; - } tcd; - char __pad[SMP_CACHE_BYTES]; -}; - -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct page_collection { - struct list_head pc_pages; - /* - * spin-lock protecting ->pc_pages. It is taken by smp_call_function() - * call-back functions. XXX nikita: Which is horrible: all processors - * receive NMI at the same time only to be serialized by this - * lock. Probably ->pc_pages should be replaced with an array of - * NR_CPUS elements accessed locklessly. - */ - spinlock_t pc_lock; - /* - * if this flag is set, collect_pages() will spill both - * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, - * only ->tcd_pages are spilled. - */ - int pc_want_daemon_pages; -}; - -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - cfs_waitq_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -/* - * small data-structure for each page owned by tracefiled. - */ -/* XXX nikita: this declaration is internal to tracefile.c and should probably - * be moved there */ -struct trace_page { - /* - * page itself - */ - cfs_page_t *page; - /* - * linkage into one of the lists in trace_data_union or - * page_collection - */ - struct list_head linkage; - /* - * number of bytes used within this page - */ - unsigned int used; - /* - * cpu that owns this page - */ - int cpu; -}; - -extern void set_ptldebug_header(struct ptldebug_header *header, - int subsys, int mask, const int line, - unsigned long stack); -extern void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn); - -extern struct trace_cpu_data *trace_get_tcd(void); -extern void trace_put_tcd(struct trace_cpu_data *tcd); -extern char *trace_get_console_buffer(void); -extern void trace_put_console_buffer(char *buffer); - -extern void trace_call_on_all_cpus(void (*fn)(void *arg), void *arg); - -int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, - struct list_head *stock); - - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage); - -extern void trace_assertion_failed(const char *str, const char *fn, - const char *file, int line); - -/* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) { \ - trace_assertion_failed("ASSERTION("#cond") failed", \ - __FUNCTION__, __FILE__, __LINE__); \ - } \ -}) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -({ \ - __LASSERT(tage != NULL); \ - __LASSERT(tage->page != NULL); \ - __LASSERT(tage->used <= CFS_PAGE_SIZE); \ - __LASSERT(cfs_page_count(tage->page) > 0); \ -}) - -#endif /* LUSTRE_TRACEFILE_PRIVATE */ - -#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/lnet/libcfs/user-lock.c b/lnet/libcfs/user-lock.c deleted file mode 100644 index a1a6779b71ab955150b0d8f0a0502ecfb96b5911..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-lock.c +++ /dev/null @@ -1,237 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable time API for user-level. - * - */ - -/* Implementations of portable synchronization APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - * - * XXX Liang: There are several branches share lnet with b_hd_newconfig, - * if we define lock APIs at here, there will be conflict with liblustre - * in other branches. - */ - -#ifndef __KERNEL__ - -#include <stdlib.h> -#include <libcfs/libcfs.h> -/* - * Optional debugging (magic stamping and checking ownership) can be added. - */ - -#if 0 -/* - * spin_lock - * - * - spin_lock_init(x) - * - spin_lock(x) - * - spin_unlock(x) - * - spin_trylock(x) - * - * - spin_lock_irqsave(x, f) - * - spin_unlock_irqrestore(x, f) - * - * No-op implementation. - */ - -void spin_lock_init(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_lock(spinlock_t *lock) -{ - (void)lock; -} - -void spin_unlock(spinlock_t *lock) -{ - (void)lock; -} - -int spin_trylock(spinlock_t *lock) -{ - (void)lock; - return 1; -} - -void spin_lock_bh_init(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_lock_bh(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -void spin_unlock_bh(spinlock_t *lock) -{ - LASSERT(lock != NULL); - (void)lock; -} - -/* - * Semaphore - * - * - sema_init(x, v) - * - __down(x) - * - __up(x) - */ -struct semaphore {}; - -void sema_init(struct semaphore *s, int val) -{ - LASSERT(s != NULL); - (void)s; - (void)val; -} - -void __down(struct semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void __up(struct semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -/* - * Mutex: - * - * - init_mutex(x) - * - init_mutex_locked(x) - * - mutex_up(x) - * - mutex_down(x) - */ - -#define mutex_up(s) __up(s) -#define mutex_down(s) __down(s) - -#define init_mutex(x) sema_init(x, 1) -#define init_mutex_locked(x) sema_init(x, 0) - -/* - * Completion: - * - * - init_completion(c) - * - complete(c) - * - wait_for_completion(c) - */ -struct completion {}; - -void init_completion(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -void complete(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -void wait_for_completion(struct completion *c) -{ - LASSERT(c != NULL); - (void)c; -} - -/* - * rw_semaphore: - * - * - DECLARE_RWSEM(x) - * - init_rwsem(x) - * - down_read(x) - * - up_read(x) - * - down_write(x) - * - up_write(x) - */ -struct rw_semaphore {}; - -void init_rwsem(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void down_read(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -int down_read_trylock(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; - return 1; -} - -void down_write(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -int down_write_trylock(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; - return 1; -} - -void up_read(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} - -void up_write(struct rw_semaphore *s) -{ - LASSERT(s != NULL); - (void)s; -} -#endif - -/* !__KERNEL__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/user-prim.c b/lnet/libcfs/user-prim.c deleted file mode 100644 index 8d968a02e8bf3b5e5f207dffc61f7eaa87757e47..0000000000000000000000000000000000000000 --- a/lnet/libcfs/user-prim.c +++ /dev/null @@ -1,360 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Nikita Danilov <nikita@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under the - * terms of version 2 of the GNU General Public License as published by the - * Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass - * Ave, Cambridge, MA 02139, USA. - * - * Implementation of portable APIs for user-level. - * - */ - -/* Implementations of portable APIs for liblustre */ - -/* - * liblustre is single-threaded, so most "synchronization" APIs are trivial. - */ - -#ifndef __KERNEL__ - -#include <sys/mman.h> -#ifndef __CYGWIN__ -#include <stdint.h> -#ifdef HAVE_ASM_PAGE_H -#include <asm/page.h> -#endif -#ifdef HAVE_SYS_USER_H -#include <sys/user.h> -#endif -#else -#include <sys/types.h> -#endif -#include <stdlib.h> -#include <string.h> -#include <signal.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/vfs.h> - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -/* - * Sleep channel. No-op implementation. - */ - -void cfs_waitq_init(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitlink_init(struct cfs_waitlink *link) -{ - LASSERT(link != NULL); - (void)link; -} - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - (void)waitq; - (void)link; -} - -int cfs_waitq_active(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; - return 0; -} - -void cfs_waitq_signal(struct cfs_waitq *waitq) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_broadcast(struct cfs_waitq *waitq, int state) -{ - LASSERT(waitq != NULL); - (void)waitq; -} - -void cfs_waitq_wait(struct cfs_waitlink *link) -{ - LASSERT(link != NULL); - (void)link; -} - -int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout) -{ - LASSERT(link != NULL); - (void)link; - return 0; -} - -/* - * Allocator - */ - -cfs_page_t *cfs_alloc_page(unsigned int flags) -{ - cfs_page_t *pg = malloc(sizeof(*pg)); - - if (!pg) - return NULL; - pg->addr = malloc(CFS_PAGE_SIZE); - - if (!pg->addr) { - free(pg); - return NULL; - } - return pg; -} - -void cfs_free_page(cfs_page_t *pg) -{ - free(pg->addr); - free(pg); -} - -void *cfs_page_address(cfs_page_t *pg) -{ - return pg->addr; -} - -void *cfs_kmap(cfs_page_t *pg) -{ - return pg->addr; -} - -void cfs_kunmap(cfs_page_t *pg) -{ -} - -/* - * SLAB allocator - */ - -cfs_mem_cache_t * -cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags) -{ - cfs_mem_cache_t *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->size = objsize; - CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n", - name, c, (int)objsize); - return c; -} - -int cfs_mem_cache_destroy(cfs_mem_cache_t *c) -{ - CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size); - free(c); - return 0; -} - -void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp) -{ - return cfs_alloc(c->size, gfp); -} - -void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr) -{ - cfs_free(addr); -} - -/* - * This uses user-visible declarations from <linux/kdev_t.h> - */ -#ifdef __LINUX__ -#include <linux/kdev_t.h> -#endif - -#ifndef MKDEV - -#define MAJOR(dev) ((dev)>>8) -#define MINOR(dev) ((dev) & 0xff) -#define MKDEV(ma,mi) ((ma)<<8 | (mi)) - -#endif - -cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor) -{ - return MKDEV(major, minor); -} - -cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev) -{ - return MAJOR(rdev); -} - -cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev) -{ - return MINOR(rdev); -} - -void cfs_enter_debugger(void) -{ - /* - * nothing for now. - */ -} - -void cfs_daemonize(char *str) -{ - return; -} - -cfs_sigset_t cfs_block_allsigs(void) -{ - cfs_sigset_t all; - cfs_sigset_t old; - int rc; - - sigfillset(&all); - rc = sigprocmask(SIG_SETMASK, &all, &old); - LASSERT(rc == 0); - - return old; -} - -cfs_sigset_t cfs_block_sigs(cfs_sigset_t blocks) -{ - cfs_sigset_t old; - int rc; - - rc = sigprocmask(SIG_SETMASK, &blocks, &old); - LASSERT (rc == 0); - - return old; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ - int rc = sigprocmask(SIG_SETMASK, &old, NULL); - - LASSERT (rc == 0); -} - -int cfs_signal_pending(void) -{ - cfs_sigset_t empty; - cfs_sigset_t set; - int rc; - - rc = sigpending(&set); - LASSERT (rc == 0); - - sigemptyset(&empty); - - return !memcmp(&empty, &set, sizeof(set)); -} - -void cfs_clear_sigpending(void) -{ - return; -} - -#ifdef __LINUX__ - -/* - * In glibc (NOT in Linux, so check above is not right), implement - * stack-back-tracing through backtrace() function. - */ -#include <execinfo.h> - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ - backtrace(trace->frame, sizeof_array(trace->frame)); -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) - return trace->frame[frame_no]; - else - return NULL; -} - -#else - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{} -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} - -/* __LINUX__ */ -#endif - -void lbug_with_loc(char *file, const char *func, const int line) -{ - /* No libcfs_catastrophe in userspace! */ - libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); - abort(); -} - - -/* !__KERNEL__ */ -#endif - -/* - * Local variables: - * c-indentation-style: "K&R" - * c-basic-offset: 8 - * tab-width: 8 - * fill-column: 80 - * scroll-step: 1 - * End: - */ diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c deleted file mode 100644 index 22754af525b15ddb6c20a66764a54a00f756fb8b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/watchdog.c +++ /dev/null @@ -1,414 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * Author: Jacob Berkman <jacob@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -struct lc_watchdog { - cfs_timer_t lcw_timer; /* kernel timer */ - struct list_head lcw_list; - struct timeval lcw_last_touched; - cfs_task_t *lcw_task; - - void (*lcw_callback)(pid_t, void *); - void *lcw_data; - - pid_t lcw_pid; - cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */ - - enum { - LC_WATCHDOG_DISABLED, - LC_WATCHDOG_ENABLED, - LC_WATCHDOG_EXPIRED - } lcw_state; -}; - -#ifdef WITH_WATCHDOG -/* - * The dispatcher will complete lcw_start_completion when it starts, - * and lcw_stop_completion when it exits. - * Wake lcw_event_waitq to signal timer callback dispatches. - */ -static struct completion lcw_start_completion; -static struct completion lcw_stop_completion; -static wait_queue_head_t lcw_event_waitq; - -/* - * Set this and wake lcw_event_waitq to stop the dispatcher. - */ -enum { - LCW_FLAG_STOP = 0 -}; -static unsigned long lcw_flags = 0; - -/* - * Number of outstanding watchdogs. - * When it hits 1, we start the dispatcher. - * When it hits 0, we stop the distpatcher. - */ -static __u32 lcw_refcount = 0; -static DECLARE_MUTEX(lcw_refcount_sem); - -/* - * List of timers that have fired that need their callbacks run by the - * dispatcher. - */ -static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */ -static struct list_head lcw_pending_timers = \ - LIST_HEAD_INIT(lcw_pending_timers); - -#ifdef HAVE_TASKLIST_LOCK -static void -lcw_dump(struct lc_watchdog *lcw) -{ - cfs_task_t *tsk; - ENTRY; - - read_lock(&tasklist_lock); - tsk = find_task_by_pid(lcw->lcw_pid); - - if (tsk == NULL) { - CWARN("Process %d was not found in the task list; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); - } else if (tsk != lcw->lcw_task) { - CWARN("The current process %d did not set the watchdog; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); - } else { - libcfs_debug_dumpstack(tsk); - } - - read_unlock(&tasklist_lock); - EXIT; -} -#else -static void -lcw_dump(struct lc_watchdog *lcw) -{ - CERROR("unable to dump stack because of missing export\n"); -} -#endif - -static void lcw_cb(unsigned long data) -{ - struct lc_watchdog *lcw = (struct lc_watchdog *)data; - - ENTRY; - - if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { - EXIT; - return; - } - - lcw->lcw_state = LC_WATCHDOG_EXPIRED; - - /* NB this warning should appear on the console, but may not get into - * the logs since we're running in a softirq handler */ - - CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); - lcw_dump(lcw); - - spin_lock_bh(&lcw_pending_timers_lock); - - if (list_empty(&lcw->lcw_list)) { - list_add(&lcw->lcw_list, &lcw_pending_timers); - wake_up(&lcw_event_waitq); - } - - spin_unlock_bh(&lcw_pending_timers_lock); - - EXIT; -} - -static int is_watchdog_fired(void) -{ - int rc; - - if (test_bit(LCW_FLAG_STOP, &lcw_flags)) - return 1; - - spin_lock_bh(&lcw_pending_timers_lock); - rc = !list_empty(&lcw_pending_timers); - spin_unlock_bh(&lcw_pending_timers_lock); - return rc; -} - -static int lcw_dispatch_main(void *data) -{ - int rc = 0; - unsigned long flags; - struct lc_watchdog *lcw; - - ENTRY; - - cfs_daemonize("lc_watchdogd"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - complete(&lcw_start_completion); - - while (1) { - wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); - CDEBUG(D_INFO, "Watchdog got woken up...\n"); - if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { - CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); - - spin_lock_bh(&lcw_pending_timers_lock); - rc = !list_empty(&lcw_pending_timers); - spin_unlock_bh(&lcw_pending_timers_lock); - if (rc) { - CERROR("pending timers list was not empty at " - "time of watchdog dispatch shutdown\n"); - } - break; - } - - spin_lock_bh(&lcw_pending_timers_lock); - while (!list_empty(&lcw_pending_timers)) { - - lcw = list_entry(lcw_pending_timers.next, - struct lc_watchdog, - lcw_list); - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - CDEBUG(D_INFO, "found lcw for pid %d: inactive for " - "%lds\n", (int)lcw->lcw_pid, - cfs_duration_sec(lcw->lcw_time)); - - if (lcw->lcw_state != LC_WATCHDOG_DISABLED) - lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); - - spin_lock_bh(&lcw_pending_timers_lock); - } - spin_unlock_bh(&lcw_pending_timers_lock); - } - - complete(&lcw_stop_completion); - - RETURN(rc); -} - -static void lcw_dispatch_start(void) -{ - int rc; - - ENTRY; - LASSERT(lcw_refcount == 1); - - init_completion(&lcw_stop_completion); - init_completion(&lcw_start_completion); - init_waitqueue_head(&lcw_event_waitq); - - CDEBUG(D_INFO, "starting dispatch thread\n"); - rc = kernel_thread(lcw_dispatch_main, NULL, 0); - if (rc < 0) { - CERROR("error spawning watchdog dispatch thread: %d\n", rc); - EXIT; - return; - } - wait_for_completion(&lcw_start_completion); - CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); - - EXIT; -} - -static void lcw_dispatch_stop(void) -{ - ENTRY; - LASSERT(lcw_refcount == 0); - - CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); - - set_bit(LCW_FLAG_STOP, &lcw_flags); - wake_up(&lcw_event_waitq); - - wait_for_completion(&lcw_stop_completion); - - CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); - - EXIT; -} - -struct lc_watchdog *lc_watchdog_add(int timeout_ms, - void (*callback)(pid_t, void *), - void *data) -{ - struct lc_watchdog *lcw = NULL; - ENTRY; - - LIBCFS_ALLOC(lcw, sizeof(*lcw)); - if (lcw == NULL) { - CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); - RETURN(ERR_PTR(-ENOMEM)); - } - - lcw->lcw_task = cfs_current(); - lcw->lcw_pid = cfs_curproc_pid(); - lcw->lcw_time = cfs_time_seconds(timeout_ms) / 1000; - lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; - lcw->lcw_data = data; - lcw->lcw_state = LC_WATCHDOG_DISABLED; - - INIT_LIST_HEAD(&lcw->lcw_list); - - lcw->lcw_timer.function = lcw_cb; - lcw->lcw_timer.data = (unsigned long)lcw; - lcw->lcw_timer.expires = jiffies + lcw->lcw_time; - init_timer(&lcw->lcw_timer); - - down(&lcw_refcount_sem); - if (++lcw_refcount == 1) - lcw_dispatch_start(); - up(&lcw_refcount_sem); - - /* Keep this working in case we enable them by default */ - if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { - do_gettimeofday(&lcw->lcw_last_touched); - add_timer(&lcw->lcw_timer); - } - - RETURN(lcw); -} -EXPORT_SYMBOL(lc_watchdog_add); - -static void lcw_update_time(struct lc_watchdog *lcw, const char *message) -{ - struct timeval newtime; - struct timeval timediff; - - do_gettimeofday(&newtime); - if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { - cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff); - CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", - lcw->lcw_pid, - message, - timediff.tv_sec, - timediff.tv_usec / 100); - } - lcw->lcw_last_touched = newtime; -} - -void lc_watchdog_touch(struct lc_watchdog *lcw) -{ - ENTRY; - LASSERT(lcw != NULL); - - spin_lock_bh(&lcw_pending_timers_lock); - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - lcw_update_time(lcw, "touched"); - lcw->lcw_state = LC_WATCHDOG_ENABLED; - - mod_timer(&lcw->lcw_timer, jiffies + lcw->lcw_time); - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_touch); - -void lc_watchdog_disable(struct lc_watchdog *lcw) -{ - ENTRY; - LASSERT(lcw != NULL); - - spin_lock_bh(&lcw_pending_timers_lock); - if (!list_empty(&lcw->lcw_list)) - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - lcw_update_time(lcw, "disabled"); - lcw->lcw_state = LC_WATCHDOG_DISABLED; - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_disable); - -void lc_watchdog_delete(struct lc_watchdog *lcw) -{ - ENTRY; - LASSERT(lcw != NULL); - - del_timer(&lcw->lcw_timer); - - lcw_update_time(lcw, "deleted"); - - spin_lock_bh(&lcw_pending_timers_lock); - if (!list_empty(&lcw->lcw_list)) - list_del_init(&lcw->lcw_list); - spin_unlock_bh(&lcw_pending_timers_lock); - - down(&lcw_refcount_sem); - if (--lcw_refcount == 0) - lcw_dispatch_stop(); - up(&lcw_refcount_sem); - - LIBCFS_FREE(lcw, sizeof(*lcw)); - - EXIT; -} -EXPORT_SYMBOL(lc_watchdog_delete); - -/* - * Provided watchdog handlers - */ - -void lc_watchdog_dumplog(pid_t pid, void *data) -{ - libcfs_debug_dumplog_internal((void *)((unsigned long)pid)); -} -EXPORT_SYMBOL(lc_watchdog_dumplog); - -#else /* !defined(WITH_WATCHDOG) */ - -struct lc_watchdog *lc_watchdog_add(int timeout_ms, - void (*callback)(pid_t pid, void *), - void *data) -{ - static struct lc_watchdog watchdog; - return &watchdog; -} -EXPORT_SYMBOL(lc_watchdog_add); - -void lc_watchdog_touch(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_touch); - -void lc_watchdog_disable(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_disable); - -void lc_watchdog_delete(struct lc_watchdog *lcw) -{ -} -EXPORT_SYMBOL(lc_watchdog_delete); - -#endif - diff --git a/lnet/libcfs/winnt/winnt-curproc.c b/lnet/libcfs/winnt/winnt-curproc.c deleted file mode 100644 index e21c5c9d51d573050fef5cc9b5d44bffc6004cb1..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-curproc.c +++ /dev/null @@ -1,453 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - * - * Impletion of winnt curproc routines. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) - * for Linux kernel. - */ - -cfs_task_t this_task = - { 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, - "sysetm\0" }; - - -uid_t cfs_curproc_uid(void) -{ - return this_task.uid; -} - -gid_t cfs_curproc_gid(void) -{ - return this_task.gid; -} - -uid_t cfs_curproc_fsuid(void) -{ - return this_task.fsuid; -} - -gid_t cfs_curproc_fsgid(void) -{ - return this_task.fsgid; -} - -pid_t cfs_curproc_pid(void) -{ - return cfs_current()->pid; -} - -int cfs_curproc_groups_nr(void) -{ - return this_task.ngroups; -} - -void cfs_curproc_groups_dump(gid_t *array, int size) -{ - LASSERT(size <= NGROUPS); - size = min_t(int, size, this_task.ngroups); - memcpy(array, this_task.groups, size * sizeof(__u32)); -} - -int cfs_curproc_is_in_groups(gid_t gid) -{ - return in_group_p(gid); -} - -mode_t cfs_curproc_umask(void) -{ - return this_task.umask; -} - -char *cfs_curproc_comm(void) -{ - return this_task.comm; -} - -cfs_kernel_cap_t cfs_curproc_cap_get(void) -{ - return this_task.cap_effective; -} - -void cfs_curproc_cap_set(cfs_kernel_cap_t cap) -{ - this_task.cap_effective = cap; -} - - -/* - * Implementation of linux task management routines - */ - - -/* global of the task manager structure */ - -TASK_MAN TaskMan; - - -/* - * task slot routiens - */ - -PTASK_SLOT -alloc_task_slot() -{ - PTASK_SLOT task = NULL; - - if (TaskMan.slab) { - task = cfs_mem_cache_alloc(TaskMan.slab, 0); - } else { - task = cfs_alloc(sizeof(TASK_SLOT), 0); - } - - return task; -} - -void -init_task_slot(PTASK_SLOT task) -{ - memset(task, 0, sizeof(TASK_SLOT)); - task->Magic = TASKSLT_MAGIC; - task->task = this_task; - task->task.pid = (pid_t)PsGetCurrentThreadId(); - cfs_init_event(&task->Event, TRUE, FALSE); -} - - -void -cleanup_task_slot(PTASK_SLOT task) -{ - if (TaskMan.slab) { - cfs_mem_cache_free(TaskMan.slab, task); - } else { - cfs_free(task); - } -} - -/* - * task manager related routines - */ - -VOID -task_manager_notify( - IN HANDLE ProcessId, - IN HANDLE ThreadId, - IN BOOLEAN Create - ) -{ - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - spin_lock(&(TaskMan.Lock)); - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) { - - if (Create) { -/* - DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n", - ProcessId, ThreadId, TaskSlot->Tet); -*/ - } else { - /* remove the taskslot */ - RemoveEntryList(&(TaskSlot->Link)); - TaskMan.NumOfTasks--; - - /* now free the task slot */ - cleanup_task_slot(TaskSlot); - } - } - - ListEntry = ListEntry->Flink; - } - - spin_unlock(&(TaskMan.Lock)); -} - -int -init_task_manager() -{ - NTSTATUS status; - - /* initialize the content and magic */ - memset(&TaskMan, 0, sizeof(TASK_MAN)); - TaskMan.Magic = TASKMAN_MAGIC; - - /* initialize the spinlock protection */ - spin_lock_init(&TaskMan.Lock); - - /* create slab memory cache */ - TaskMan.slab = cfs_mem_cache_create( - "TSLT", sizeof(TASK_SLOT), 0, 0); - - /* intialize the list header */ - InitializeListHead(&(TaskMan.TaskList)); - - /* set the thread creation/destruction notify routine */ - status = PsSetCreateThreadNotifyRoutine(task_manager_notify); - - if (!NT_SUCCESS(status)) { - cfs_enter_debugger(); - } - - return 0; -} - -void -cleanup_task_manager() -{ - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - /* we must stay in system since we succeed to register the - CreateThreadNotifyRoutine: task_manager_notify */ - cfs_enter_debugger(); - - - /* cleanup all the taskslots attached to the list */ - spin_lock(&(TaskMan.Lock)); - - while (!IsListEmpty(&(TaskMan.TaskList))) { - - ListEntry = TaskMan.TaskList.Flink; - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - RemoveEntryList(ListEntry); - cleanup_task_slot(TaskSlot); - } - - spin_unlock(&TaskMan.Lock); - - /* destroy the taskslot cache slab */ - cfs_mem_cache_destroy(TaskMan.slab); - memset(&TaskMan, 0, sizeof(TASK_MAN)); -} - - -/* - * schedule routines (task slot list) - */ - - -cfs_task_t * -cfs_current() -{ - HANDLE Pid = PsGetCurrentProcessId(); - HANDLE Tid = PsGetCurrentThreadId(); - PETHREAD Tet = PsGetCurrentThread(); - - PLIST_ENTRY ListEntry = NULL; - PTASK_SLOT TaskSlot = NULL; - - spin_lock(&(TaskMan.Lock)); - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - - if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) { - if (TaskSlot->Tet != Tet) { - -/* - DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n", - Pid, Tid, Tet, TaskSlot->Tet); -*/ - // - // The old thread was already exit. This must be a - // new thread which get the same Tid to the previous. - // - - TaskSlot->Tet = Tet; - } - break; - - } else { - - if ((ULONG)TaskSlot->Pid > (ULONG)Pid) { - TaskSlot = NULL; - break; - } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) { - if ((ULONG)TaskSlot->Tid > (ULONG)Tid) { - TaskSlot = NULL; - break; - } - } - - TaskSlot = NULL; - } - - ListEntry = ListEntry->Flink; - } - - if (!TaskSlot) { - - TaskSlot = alloc_task_slot(); - - if (!TaskSlot) { - cfs_enter_debugger(); - goto errorout; - } - - init_task_slot(TaskSlot); - - TaskSlot->Pid = Pid; - TaskSlot->Tid = Tid; - TaskSlot->Tet = Tet; - - if (ListEntry == (&(TaskMan.TaskList))) { - // - // Empty case or the biggest case, put it to the tail. - // - InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link)); - } else { - // - // Get a slot and smaller than it's tid, put it just before. - // - InsertHeadList(ListEntry->Blink, &(TaskSlot->Link)); - } - - TaskMan.NumOfTasks++; - } - - // - // To Check whether he task structures are arranged in the expected order ? - // - - { - PTASK_SLOT Prev = NULL, Curr = NULL; - - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { - - Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - ListEntry = ListEntry->Flink; - - if (Prev) { - if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) { - cfs_enter_debugger(); - } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) { - if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) { - cfs_enter_debugger(); - } - } - } - - Prev = Curr; - } - } - -errorout: - - spin_unlock(&(TaskMan.Lock)); - - if (!TaskSlot) { - cfs_enter_debugger(); - return NULL; - } - - return (&(TaskSlot->task)); -} - -int -schedule_timeout(int64_t time) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - cfs_enter_debugger(); - return 0; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - if (time == MAX_SCHEDULE_TIMEOUT) { - time = 0; - } - - return (cfs_wait_event(&(slot->Event), time) != 0); -} - -int -schedule() -{ - return schedule_timeout(0); -} - -int -wake_up_process( - cfs_task_t * task - ) -{ - PTASK_SLOT slot = NULL; - - if (!task) { - cfs_enter_debugger(); - return 0; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - cfs_wake_event(&(slot->Event)); - - return TRUE; -} - -void -sleep_on( - cfs_waitq_t *waitq - ) -{ - cfs_waitlink_t link; - - cfs_waitlink_init(&link); - cfs_waitq_add(waitq, &link); - cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE); - cfs_waitq_del(waitq, &link); -} - -EXPORT_SYMBOL(cfs_curproc_uid); -EXPORT_SYMBOL(cfs_curproc_pid); -EXPORT_SYMBOL(cfs_curproc_gid); -EXPORT_SYMBOL(cfs_curproc_fsuid); -EXPORT_SYMBOL(cfs_curproc_fsgid); -EXPORT_SYMBOL(cfs_curproc_umask); -EXPORT_SYMBOL(cfs_curproc_comm); -EXPORT_SYMBOL(cfs_curproc_groups_nr); -EXPORT_SYMBOL(cfs_curproc_groups_dump); -EXPORT_SYMBOL(cfs_curproc_is_in_groups); -EXPORT_SYMBOL(cfs_curproc_cap_get); -EXPORT_SYMBOL(cfs_curproc_cap_set); diff --git a/lnet/libcfs/winnt/winnt-debug.c b/lnet/libcfs/winnt/winnt-debug.c deleted file mode 100644 index 9e94f845905ccf0dabc9f43bb8b78269f5f011ad..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-debug.c +++ /dev/null @@ -1,1057 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/kp30.h> -#include <libcfs/libcfs.h> -#include "tracefile.h" - -void lnet_debug_dumpstack(cfs_task_t *tsk) -{ - return; -} - -cfs_task_t *lnet_current(void) -{ - return cfs_current(); -} - -int lnet_arch_debug_init(unsigned long bufsize) -{ - return 0; -} - -int lnet_arch_debug_cleanup(void) -{ - return 0; -} - -void lnet_run_lbug_upcall(char *file, const char *fn, const int line) -{ -} - -void lbug_with_loc(char *file, const char *func, const int line) -{ - libcfs_catastrophe = 1; - CEMERG("LBUG: pid: %u thread: %#x\n", - (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread()); - // portals_debug_dumplog(); - // portals_run_lbug_upcall(file, func, line); -} - -#if TDI_LIBCFS_DBG - -/* - * Definitions - */ - -LONG KsDebugLevel = 0x5; - - -/* - * Routines - */ - - -/* - * KsNtStatusToString - * Get the error message for a specified nt status - * - * Arguments: - * Status - nt status code - * - * Return Value: - * PUCHAR - message string for the status code - * - * NOTES: - * N/A - */ - -PUCHAR -KsNtStatusToString (IN NTSTATUS Status) -{ - switch (Status) { - - case 0x00000000: return "STATUS_SUCCESS"; - case 0x00000001: return "STATUS_WAIT_1"; - case 0x00000002: return "STATUS_WAIT_2"; - case 0x00000003: return "STATUS_WAIT_3"; - case 0x0000003F: return "STATUS_WAIT_63"; - case 0x00000080: return "STATUS_ABANDONED_WAIT_0"; - case 0x000000BF: return "STATUS_ABANDONED_WAIT_63"; - case 0x000000C0: return "STATUS_USER_APC"; - case 0x00000100: return "STATUS_KERNEL_APC"; - case 0x00000101: return "STATUS_ALERTED"; - case 0x00000102: return "STATUS_TIMEOUT"; - case 0x00000103: return "STATUS_PENDING"; - case 0x00000104: return "STATUS_REPARSE"; - case 0x00000105: return "STATUS_MORE_ENTRIES"; - case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED"; - case 0x00000107: return "STATUS_SOME_NOT_MAPPED"; - case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS"; - case 0x00000109: return "STATUS_VOLUME_MOUNTED"; - case 0x0000010A: return "STATUS_RXACT_COMMITTED"; - case 0x0000010B: return "STATUS_NOTIFY_CLEANUP"; - case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR"; - case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT"; - case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED"; - case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION"; - case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO"; - case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE"; - case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE"; - case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE"; - case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED"; - case 0x00000116: return "STATUS_CRASH_DUMP"; - case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS"; - case 0x00000118: return "STATUS_REPARSE_OBJECT"; - case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED"; - case 0x00000120: return "STATUS_TRANSLATION_COMPLETE"; - case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY"; - case 0x00010001: return "DBG_EXCEPTION_HANDLED"; - case 0x00010002: return "DBG_CONTINUE"; - case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS"; - case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED"; - case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE"; - case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE"; - case 0x40000004: return "STATUS_RXACT_STATE_CREATED"; - case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION"; - case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY"; - case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY"; - case 0x40000008: return "STATUS_SERIAL_MORE_WRITES"; - case 0x40000009: return "STATUS_REGISTRY_RECOVERED"; - case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP"; - case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY"; - case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT"; - case 0x4000000D: return "STATUS_NULL_LM_PASSWORD"; - case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH"; - case 0x4000000F: return "STATUS_RECEIVE_PARTIAL"; - case 0x40000010: return "STATUS_RECEIVE_EXPEDITED"; - case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED"; - case 0x40000012: return "STATUS_EVENT_DONE"; - case 0x40000013: return "STATUS_EVENT_PENDING"; - case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM"; - case 0x40000015: return "STATUS_FATAL_APP_EXIT"; - case 0x40000016: return "STATUS_PREDEFINED_HANDLE"; - case 0x40000017: return "STATUS_WAS_UNLOCKED"; - case 0x40000018: return "STATUS_SERVICE_NOTIFICATION"; - case 0x40000019: return "STATUS_WAS_LOCKED"; - case 0x4000001A: return "STATUS_LOG_HARD_ERROR"; - case 0x4000001B: return "STATUS_ALREADY_WIN32"; - case 0x4000001C: return "STATUS_WX86_UNSIMULATE"; - case 0x4000001D: return "STATUS_WX86_CONTINUE"; - case 0x4000001E: return "STATUS_WX86_SINGLE_STEP"; - case 0x4000001F: return "STATUS_WX86_BREAKPOINT"; - case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE"; - case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE"; - case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN"; - case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE"; - case 0x40000024: return "STATUS_NO_YIELD_PERFORMED"; - case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED"; - case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED"; - case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED"; - case 0x40000028: return "STATUS_WX86_CREATEWX86TIB"; - case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH"; - case 0x40010001: return "DBG_REPLY_LATER"; - case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE"; - case 0x40010003: return "DBG_TERMINATE_THREAD"; - case 0x40010004: return "DBG_TERMINATE_PROCESS"; - case 0x40010005: return "DBG_CONTROL_C"; - case 0x40010006: return "DBG_PRINTEXCEPTION_C"; - case 0x40010007: return "DBG_RIPEXCEPTION"; - case 0x40010008: return "DBG_CONTROL_BREAK"; - case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION"; - case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT"; - case 0x80000003: return "STATUS_BREAKPOINT"; - case 0x80000004: return "STATUS_SINGLE_STEP"; - case 0x80000005: return "STATUS_BUFFER_OVERFLOW"; - case 0x80000006: return "STATUS_NO_MORE_FILES"; - case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER"; - case 0x8000000A: return "STATUS_HANDLES_CLOSED"; - case 0x8000000B: return "STATUS_NO_INHERITANCE"; - case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE"; - case 0x8000000D: return "STATUS_PARTIAL_COPY"; - case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY"; - case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF"; - case 0x80000010: return "STATUS_DEVICE_OFF_LINE"; - case 0x80000011: return "STATUS_DEVICE_BUSY"; - case 0x80000012: return "STATUS_NO_MORE_EAS"; - case 0x80000013: return "STATUS_INVALID_EA_NAME"; - case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT"; - case 0x80000015: return "STATUS_INVALID_EA_FLAG"; - case 0x80000016: return "STATUS_VERIFY_REQUIRED"; - case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION"; - case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY"; - case 0x8000001A: return "STATUS_NO_MORE_ENTRIES"; - case 0x8000001B: return "STATUS_FILEMARK_DETECTED"; - case 0x8000001C: return "STATUS_MEDIA_CHANGED"; - case 0x8000001D: return "STATUS_BUS_RESET"; - case 0x8000001E: return "STATUS_END_OF_MEDIA"; - case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA"; - case 0x80000020: return "STATUS_MEDIA_CHECK"; - case 0x80000021: return "STATUS_SETMARK_DETECTED"; - case 0x80000022: return "STATUS_NO_DATA_DETECTED"; - case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES"; - case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES"; - case 0x80000025: return "STATUS_ALREADY_DISCONNECTED"; - case 0x80000026: return "STATUS_LONGJUMP"; - case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED"; - case 0xC0000001: return "STATUS_UNSUCCESSFUL"; - case 0xC0000002: return "STATUS_NOT_IMPLEMENTED"; - case 0xC0000003: return "STATUS_INVALID_INFO_CLASS"; - case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH"; - case 0xC0000005: return "STATUS_ACCESS_VIOLATION"; - case 0xC0000006: return "STATUS_IN_PAGE_ERROR"; - case 0xC0000007: return "STATUS_PAGEFILE_QUOTA"; - case 0xC0000008: return "STATUS_INVALID_HANDLE"; - case 0xC0000009: return "STATUS_BAD_INITIAL_STACK"; - case 0xC000000A: return "STATUS_BAD_INITIAL_PC"; - case 0xC000000B: return "STATUS_INVALID_CID"; - case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED"; - case 0xC000000D: return "STATUS_INVALID_PARAMETER"; - case 0xC000000E: return "STATUS_NO_SUCH_DEVICE"; - case 0xC000000F: return "STATUS_NO_SUCH_FILE"; - case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST"; - case 0xC0000011: return "STATUS_END_OF_FILE"; - case 0xC0000012: return "STATUS_WRONG_VOLUME"; - case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE"; - case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA"; - case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR"; - case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED"; - case 0xC0000017: return "STATUS_NO_MEMORY"; - case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES"; - case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW"; - case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM"; - case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION"; - case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE"; - case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION"; - case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE"; - case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE"; - case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION"; - case 0xC0000021: return "STATUS_ALREADY_COMMITTED"; - case 0xC0000022: return "STATUS_ACCESS_DENIED"; - case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL"; - case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH"; - case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION"; - case 0xC0000026: return "STATUS_INVALID_DISPOSITION"; - case 0xC0000027: return "STATUS_UNWIND"; - case 0xC0000028: return "STATUS_BAD_STACK"; - case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET"; - case 0xC000002A: return "STATUS_NOT_LOCKED"; - case 0xC000002B: return "STATUS_PARITY_ERROR"; - case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM"; - case 0xC000002D: return "STATUS_NOT_COMMITTED"; - case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES"; - case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG"; - case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX"; - case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER"; - case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR"; - case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID"; - case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND"; - case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION"; - case 0xC0000037: return "STATUS_PORT_DISCONNECTED"; - case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED"; - case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID"; - case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND"; - case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD"; - case 0xC000003C: return "STATUS_DATA_OVERRUN"; - case 0xC000003D: return "STATUS_DATA_LATE_ERROR"; - case 0xC000003E: return "STATUS_DATA_ERROR"; - case 0xC000003F: return "STATUS_CRC_ERROR"; - case 0xC0000040: return "STATUS_SECTION_TOO_BIG"; - case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED"; - case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE"; - case 0xC0000043: return "STATUS_SHARING_VIOLATION"; - case 0xC0000044: return "STATUS_QUOTA_EXCEEDED"; - case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION"; - case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED"; - case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED"; - case 0xC0000048: return "STATUS_PORT_ALREADY_SET"; - case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE"; - case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED"; - case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING"; - case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT"; - case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP"; - case 0xC000004E: return "STATUS_SECTION_PROTECTION"; - case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED"; - case 0xC0000050: return "STATUS_EA_TOO_LARGE"; - case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY"; - case 0xC0000052: return "STATUS_NO_EAS_ON_FILE"; - case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR"; - case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT"; - case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED"; - case 0xC0000056: return "STATUS_DELETE_PENDING"; - case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED"; - case 0xC0000058: return "STATUS_UNKNOWN_REVISION"; - case 0xC0000059: return "STATUS_REVISION_MISMATCH"; - case 0xC000005A: return "STATUS_INVALID_OWNER"; - case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP"; - case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN"; - case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY"; - case 0xC000005E: return "STATUS_NO_LOGON_SERVERS"; - case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION"; - case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE"; - case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD"; - case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME"; - case 0xC0000063: return "STATUS_USER_EXISTS"; - case 0xC0000064: return "STATUS_NO_SUCH_USER"; - case 0xC0000065: return "STATUS_GROUP_EXISTS"; - case 0xC0000066: return "STATUS_NO_SUCH_GROUP"; - case 0xC0000067: return "STATUS_MEMBER_IN_GROUP"; - case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP"; - case 0xC0000069: return "STATUS_LAST_ADMIN"; - case 0xC000006A: return "STATUS_WRONG_PASSWORD"; - case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD"; - case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION"; - case 0xC000006D: return "STATUS_LOGON_FAILURE"; - case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION"; - case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS"; - case 0xC0000070: return "STATUS_INVALID_WORKSTATION"; - case 0xC0000071: return "STATUS_PASSWORD_EXPIRED"; - case 0xC0000072: return "STATUS_ACCOUNT_DISABLED"; - case 0xC0000073: return "STATUS_NONE_MAPPED"; - case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED"; - case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED"; - case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY"; - case 0xC0000077: return "STATUS_INVALID_ACL"; - case 0xC0000078: return "STATUS_INVALID_SID"; - case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR"; - case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND"; - case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT"; - case 0xC000007C: return "STATUS_NO_TOKEN"; - case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL"; - case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED"; - case 0xC000007F: return "STATUS_DISK_FULL"; - case 0xC0000080: return "STATUS_SERVER_DISABLED"; - case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED"; - case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED"; - case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED"; - case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY"; - case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED"; - case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL"; - case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED"; - case 0xC0000088: return "STATUS_NOT_MAPPED_DATA"; - case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND"; - case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND"; - case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND"; - case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED"; - case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND"; - case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO"; - case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT"; - case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION"; - case 0xC0000091: return "STATUS_FLOAT_OVERFLOW"; - case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK"; - case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW"; - case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO"; - case 0xC0000095: return "STATUS_INTEGER_OVERFLOW"; - case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION"; - case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES"; - case 0xC0000098: return "STATUS_FILE_INVALID"; - case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED"; - case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES"; - case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND"; - case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR"; - case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED"; - case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE"; - case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE"; - case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED"; - case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA"; - case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED"; - case 0xC00000A3: return "STATUS_DEVICE_NOT_READY"; - case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES"; - case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL"; - case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS"; - case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS"; - case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE"; - case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD"; - case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT"; - case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE"; - case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE"; - case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE"; - case 0xC00000AE: return "STATUS_PIPE_BUSY"; - case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION"; - case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED"; - case 0xC00000B1: return "STATUS_PIPE_CLOSING"; - case 0xC00000B2: return "STATUS_PIPE_CONNECTED"; - case 0xC00000B3: return "STATUS_PIPE_LISTENING"; - case 0xC00000B4: return "STATUS_INVALID_READ_MODE"; - case 0xC00000B5: return "STATUS_IO_TIMEOUT"; - case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED"; - case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED"; - case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED"; - case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET"; - case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY"; - case 0xC00000BB: return "STATUS_NOT_SUPPORTED"; - case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING"; - case 0xC00000BD: return "STATUS_DUPLICATE_NAME"; - case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH"; - case 0xC00000BF: return "STATUS_NETWORK_BUSY"; - case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST"; - case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS"; - case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR"; - case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE"; - case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR"; - case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER"; - case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL"; - case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE"; - case 0xC00000C8: return "STATUS_PRINT_CANCELLED"; - case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED"; - case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED"; - case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE"; - case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME"; - case 0xC00000CD: return "STATUS_TOO_MANY_NAMES"; - case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS"; - case 0xC00000CF: return "STATUS_SHARING_PAUSED"; - case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED"; - case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED"; - case 0xC00000D2: return "STATUS_NET_WRITE_FAULT"; - case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT"; - case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE"; - case 0xC00000D5: return "STATUS_FILE_RENAMED"; - case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED"; - case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT"; - case 0xC00000D8: return "STATUS_CANT_WAIT"; - case 0xC00000D9: return "STATUS_PIPE_EMPTY"; - case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO"; - case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF"; - case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE"; - case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE"; - case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE"; - case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN"; - case 0xC00000E0: return "STATUS_DOMAIN_EXISTS"; - case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED"; - case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED"; - case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL"; - case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION"; - case 0xC00000E5: return "STATUS_INTERNAL_ERROR"; - case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED"; - case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT"; - case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER"; - case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR"; - case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR"; - case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR"; - case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR"; - case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS"; - case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS"; - case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1"; - case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2"; - case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3"; - case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4"; - case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5"; - case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6"; - case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7"; - case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8"; - case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9"; - case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10"; - case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11"; - case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12"; - case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED"; - case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED"; - case 0xC00000FD: return "STATUS_STACK_OVERFLOW"; - case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE"; - case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE"; - case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND"; - case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY"; - case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR"; - case 0xC0000103: return "STATUS_NOT_A_DIRECTORY"; - case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE"; - case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION"; - case 0xC0000106: return "STATUS_NAME_TOO_LONG"; - case 0xC0000107: return "STATUS_FILES_OPEN"; - case 0xC0000108: return "STATUS_CONNECTION_IN_USE"; - case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND"; - case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING"; - case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE"; - case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION"; - case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE"; - case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED"; - case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT"; - case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST"; - case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED"; - case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER"; - case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND"; - case 0xC0000114: return "STATUS_ABIOS_INVALID_LID"; - case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE"; - case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR"; - case 0xC0000117: return "STATUS_NO_LDT"; - case 0xC0000118: return "STATUS_INVALID_LDT_SIZE"; - case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET"; - case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR"; - case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT"; - case 0xC000011C: return "STATUS_RXACT_INVALID_STATE"; - case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE"; - case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO"; - case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES"; - case 0xC0000120: return "STATUS_CANCELLED"; - case 0xC0000121: return "STATUS_CANNOT_DELETE"; - case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME"; - case 0xC0000123: return "STATUS_FILE_DELETED"; - case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT"; - case 0xC0000125: return "STATUS_SPECIAL_GROUP"; - case 0xC0000126: return "STATUS_SPECIAL_USER"; - case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP"; - case 0xC0000128: return "STATUS_FILE_CLOSED"; - case 0xC0000129: return "STATUS_TOO_MANY_THREADS"; - case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS"; - case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE"; - case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED"; - case 0xC000012D: return "STATUS_COMMITMENT_LIMIT"; - case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT"; - case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ"; - case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT"; - case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16"; - case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT"; - case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC"; - case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED"; - case 0xC0000135: return "STATUS_DLL_NOT_FOUND"; - case 0xC0000136: return "STATUS_OPEN_FAILED"; - case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED"; - case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND"; - case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND"; - case 0xC000013A: return "STATUS_CONTROL_C_EXIT"; - case 0xC000013B: return "STATUS_LOCAL_DISCONNECT"; - case 0xC000013C: return "STATUS_REMOTE_DISCONNECT"; - case 0xC000013D: return "STATUS_REMOTE_RESOURCES"; - case 0xC000013E: return "STATUS_LINK_FAILED"; - case 0xC000013F: return "STATUS_LINK_TIMEOUT"; - case 0xC0000140: return "STATUS_INVALID_CONNECTION"; - case 0xC0000141: return "STATUS_INVALID_ADDRESS"; - case 0xC0000142: return "STATUS_DLL_INIT_FAILED"; - case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE"; - case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION"; - case 0xC0000145: return "STATUS_APP_INIT_FAILURE"; - case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED"; - case 0xC0000147: return "STATUS_NO_PAGEFILE"; - case 0xC0000148: return "STATUS_INVALID_LEVEL"; - case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE"; - case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT"; - case 0xC000014B: return "STATUS_PIPE_BROKEN"; - case 0xC000014C: return "STATUS_REGISTRY_CORRUPT"; - case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED"; - case 0xC000014E: return "STATUS_NO_EVENT_PAIR"; - case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME"; - case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED"; - case 0xC0000151: return "STATUS_NO_SUCH_ALIAS"; - case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS"; - case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS"; - case 0xC0000154: return "STATUS_ALIAS_EXISTS"; - case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED"; - case 0xC0000156: return "STATUS_TOO_MANY_SECRETS"; - case 0xC0000157: return "STATUS_SECRET_TOO_LONG"; - case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR"; - case 0xC0000159: return "STATUS_FULLSCREEN_MODE"; - case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS"; - case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED"; - case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE"; - case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED"; - case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR"; - case 0xC000015F: return "STATUS_FT_MISSING_MEMBER"; - case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY"; - case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER"; - case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER"; - case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER"; - case 0xC0000164: return "STATUS_FLOPPY_VOLUME"; - case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND"; - case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER"; - case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR"; - case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS"; - case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED"; - case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED"; - case 0xC000016B: return "STATUS_DISK_RESET_FAILED"; - case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY"; - case 0xC000016D: return "STATUS_FT_ORPHANING"; - case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT"; - case 0xC0000172: return "STATUS_PARTITION_FAILURE"; - case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH"; - case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED"; - case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA"; - case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA"; - case 0xC0000177: return "STATUS_EOM_OVERFLOW"; - case 0xC0000178: return "STATUS_NO_MEDIA"; - case 0xC000017A: return "STATUS_NO_SUCH_MEMBER"; - case 0xC000017B: return "STATUS_INVALID_MEMBER"; - case 0xC000017C: return "STATUS_KEY_DELETED"; - case 0xC000017D: return "STATUS_NO_LOG_SPACE"; - case 0xC000017E: return "STATUS_TOO_MANY_SIDS"; - case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED"; - case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN"; - case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE"; - case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR"; - case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR"; - case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE"; - case 0xC0000185: return "STATUS_IO_DEVICE_ERROR"; - case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR"; - case 0xC0000187: return "STATUS_BACKUP_CONTROLLER"; - case 0xC0000188: return "STATUS_LOG_FILE_FULL"; - case 0xC0000189: return "STATUS_TOO_LATE"; - case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET"; - case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT"; - case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE"; - case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE"; - case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT"; - case 0xC000018F: return "STATUS_EVENTLOG_CANT_START"; - case 0xC0000190: return "STATUS_TRUST_FAILURE"; - case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED"; - case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED"; - case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED"; - case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK"; - case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT"; - case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT"; - case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED"; - case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT"; - case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT"; - case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT"; - case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT"; - case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED"; - case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY"; - case 0xC0000203: return "STATUS_USER_SESSION_DELETED"; - case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND"; - case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES"; - case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE"; - case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT"; - case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD"; - case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES"; - case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS"; - case 0xC000020B: return "STATUS_ADDRESS_CLOSED"; - case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED"; - case 0xC000020D: return "STATUS_CONNECTION_RESET"; - case 0xC000020E: return "STATUS_TOO_MANY_NODES"; - case 0xC000020F: return "STATUS_TRANSACTION_ABORTED"; - case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT"; - case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE"; - case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH"; - case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED"; - case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID"; - case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE"; - case 0xC0000216: return "STATUS_NOT_SERVER_SESSION"; - case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION"; - case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE"; - case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED"; - case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED"; - case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED"; - case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND"; - case 0xC000021D: return "STATUS_VDM_HARD_ERROR"; - case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT"; - case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH"; - case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT"; - case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH"; - case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA"; - case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID"; - case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE"; - case 0xC0000225: return "STATUS_NOT_FOUND"; - case 0xC0000226: return "STATUS_NOT_TINY_STREAM"; - case 0xC0000227: return "STATUS_RECOVERY_FAILURE"; - case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ"; - case 0xC0000229: return "STATUS_FAIL_CHECK"; - case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID"; - case 0xC000022B: return "STATUS_OBJECTID_EXISTS"; - case 0xC000022C: return "STATUS_CONVERT_TO_LARGE"; - case 0xC000022D: return "STATUS_RETRY"; - case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE"; - case 0xC000022F: return "STATUS_ALLOCATE_BUCKET"; - case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND"; - case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW"; - case 0xC0000232: return "STATUS_INVALID_VARIANT"; - case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND"; - case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT"; - case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE"; - case 0xC0000236: return "STATUS_CONNECTION_REFUSED"; - case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT"; - case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED"; - case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED"; - case 0xC000023A: return "STATUS_CONNECTION_INVALID"; - case 0xC000023B: return "STATUS_CONNECTION_ACTIVE"; - case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE"; - case 0xC000023D: return "STATUS_HOST_UNREACHABLE"; - case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE"; - case 0xC000023F: return "STATUS_PORT_UNREACHABLE"; - case 0xC0000240: return "STATUS_REQUEST_ABORTED"; - case 0xC0000241: return "STATUS_CONNECTION_ABORTED"; - case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER"; - case 0xC0000243: return "STATUS_USER_MAPPED_FILE"; - case 0xC0000244: return "STATUS_AUDIT_FAILED"; - case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET"; - case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT"; - case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION"; - case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION"; - case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH"; - case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO"; - case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT"; - case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT"; - case 0xC0000253: return "STATUS_LPC_REPLY_LOST"; - case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1"; - case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2"; - case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT"; - case 0xC0000257: return "STATUS_PATH_NOT_COVERED"; - case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE"; - case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED"; - case 0xC000025A: return "STATUS_PWD_TOO_SHORT"; - case 0xC000025B: return "STATUS_PWD_TOO_RECENT"; - case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT"; - case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE"; - case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION"; - case 0xC0000260: return "STATUS_INVALID_HW_PROFILE"; - case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH"; - case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND"; - case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND"; - case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED"; - case 0xC0000265: return "STATUS_TOO_MANY_LINKS"; - case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT"; - case 0xC0000267: return "STATUS_FILE_IS_OFFLINE"; - case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION"; - case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION"; - case 0xC000026A: return "STATUS_LICENSE_VIOLATION"; - case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF"; - case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD"; - case 0xC000026D: return "STATUS_DFS_UNAVAILABLE"; - case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED"; - case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR"; - case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK"; - case 0xC0000271: return "STATUS_VALIDATE_CONTINUE"; - case 0xC0000272: return "STATUS_NO_MATCH"; - case 0xC0000273: return "STATUS_NO_MORE_MATCHES"; - case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT"; - case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID"; - case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH"; - case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID"; - case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED"; - case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED"; - case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT"; - case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT"; - case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY"; - case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL"; - case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS"; - case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT"; - case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED"; - case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING"; - case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN"; - case 0xC000028A: return "STATUS_ENCRYPTION_FAILED"; - case 0xC000028B: return "STATUS_DECRYPTION_FAILED"; - case 0xC000028C: return "STATUS_RANGE_NOT_FOUND"; - case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY"; - case 0xC000028E: return "STATUS_NO_EFS"; - case 0xC000028F: return "STATUS_WRONG_EFS"; - case 0xC0000290: return "STATUS_NO_USER_KEYS"; - case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED"; - case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT"; - case 0xC0000293: return "STATUS_FILE_ENCRYPTED"; - case 0x40000294: return "STATUS_WAKE_SYSTEM"; - case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND"; - case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND"; - case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND"; - case 0xC0000298: return "STATUS_WMI_TRY_AGAIN"; - case 0xC0000299: return "STATUS_SHARED_POLICY"; - case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND"; - case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS"; - case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED"; - case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE"; - case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR"; - case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE"; - case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH"; - case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE"; - case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX"; - case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED"; - case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS"; - case 0xC00002A5: return "STATUS_DS_BUSY"; - case 0xC00002A6: return "STATUS_DS_UNAVAILABLE"; - case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED"; - case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS"; - case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER"; - case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR"; - case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION"; - case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF"; - case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN"; - case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS"; - case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED"; - case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE"; - case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED"; - case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT"; - case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY"; - case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS"; - case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS"; - case 0xC00002B6: return "STATUS_DEVICE_REMOVED"; - case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS"; - case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE"; - case 0xC00002B9: return "STATUS_NOINTERFACE"; - case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED"; - case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP"; - case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED"; - case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE"; - case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR"; - case 0xC00002C6: return "STATUS_WMI_READ_ONLY"; - case 0xC00002C7: return "STATUS_WMI_SET_FAILURE"; - case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM"; - case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION"; - case 0xC00002CA: return "STATUS_TRANSPORT_FULL"; - case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE"; - case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED"; - case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION"; - case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION"; - case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED"; - case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID"; - case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE"; - case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED"; - case 0xC00002D3: return "STATUS_POWER_STATE_INVALID"; - case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE"; - case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN"; - case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN"; - case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER"; - case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER"; - case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER"; - case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER"; - case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER"; - case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS"; - case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED"; - case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER"; - case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD"; - case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY"; - case 0xC00002E1: return "STATUS_DS_CANT_START"; - case 0xC00002E2: return "STATUS_DS_INIT_FAILURE"; - case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE"; - case 0xC00002E4: return "STATUS_DS_GC_REQUIRED"; - case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY"; - case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS"; - case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED"; - case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION"; - case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS"; - case 0xC0009898: return "STATUS_WOW_ASSERTION"; - case 0xC0010001: return "DBG_NO_STATE_CHANGE"; - case 0xC0010002: return "DBG_APP_NOT_IDLE"; - case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING"; - case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING"; - case 0xC0020003: return "RPC_NT_INVALID_BINDING"; - case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED"; - case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ"; - case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID"; - case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT"; - case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR"; - case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND"; - case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT"; - case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND"; - case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED"; - case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED"; - case 0xC002000E: return "RPC_NT_ALREADY_LISTENING"; - case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED"; - case 0xC0020010: return "RPC_NT_NOT_LISTENING"; - case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE"; - case 0xC0020012: return "RPC_NT_UNKNOWN_IF"; - case 0xC0020013: return "RPC_NT_NO_BINDINGS"; - case 0xC0020014: return "RPC_NT_NO_PROTSEQS"; - case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT"; - case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES"; - case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE"; - case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY"; - case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS"; - case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE"; - case 0xC002001B: return "RPC_NT_CALL_FAILED"; - case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE"; - case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR"; - case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN"; - case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE"; - case 0xC0020022: return "RPC_NT_INVALID_TAG"; - case 0xC0020023: return "RPC_NT_INVALID_BOUND"; - case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME"; - case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX"; - case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX"; - case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS"; - case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT"; - case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE"; - case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL"; - case 0xC002002C: return "RPC_NT_STRING_TOO_LONG"; - case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND"; - case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE"; - case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH"; - case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE"; - case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL"; - case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY"; - case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE"; - case 0xC0020034: return "EPT_NT_INVALID_ENTRY"; - case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP"; - case 0xC0020036: return "EPT_NT_NOT_REGISTERED"; - case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT"; - case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME"; - case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION"; - case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS"; - case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED"; - case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND"; - case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS"; - case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND"; - case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE"; - case 0xC0020040: return "RPC_NT_INVALID_NAF_ID"; - case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT"; - case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE"; - case 0xC0020043: return "RPC_NT_INTERNAL_ERROR"; - case 0xC0020044: return "RPC_NT_ZERO_DIVIDE"; - case 0xC0020045: return "RPC_NT_ADDRESS_ERROR"; - case 0xC0020046: return "RPC_NT_FP_DIV_ZERO"; - case 0xC0020047: return "RPC_NT_FP_UNDERFLOW"; - case 0xC0020048: return "RPC_NT_FP_OVERFLOW"; - case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES"; - case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL"; - case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE"; - case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT"; - case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH"; - case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED"; - case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH"; - case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE"; - case 0xC0030009: return "RPC_NT_NULL_REF_POINTER"; - case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE"; - case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL"; - case 0xC003000C: return "RPC_NT_BAD_STUB_DATA"; - case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS"; - case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS"; - case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND"; - case 0xC002004C: return "EPT_NT_CANT_CREATE"; - case 0xC002004D: return "RPC_NT_INVALID_OBJECT"; - case 0xC002004F: return "RPC_NT_NO_INTERFACES"; - case 0xC0020050: return "RPC_NT_CALL_CANCELLED"; - case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE"; - case 0xC0020052: return "RPC_NT_COMM_FAILURE"; - case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL"; - case 0xC0020054: return "RPC_NT_NO_PRINC_NAME"; - case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR"; - case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY"; - case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR"; - case 0xC0020058: return "RPC_NT_NOT_CANCELLED"; - case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION"; - case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION"; - case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION"; - case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT"; - case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION"; - case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION"; - case 0xC003005F: return "RPC_NT_PIPE_CLOSED"; - case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR"; - case 0xC0030061: return "RPC_NT_PIPE_EMPTY"; - case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE"; - case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL"; - case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE"; - case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE"; - case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW"; - case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED"; - case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX"; - case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT"; - case 0xC0140006: return "STATUS_ACPI_FATAL"; - case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME"; - case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE"; - case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE"; - case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE"; - case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT"; - case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED"; - case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE"; - case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION"; - case 0xC014000F: return "STATUS_ACPI_INVALID_DATA"; - case 0xC0140010: return "STATUS_ACPI_INVALID_REGION"; - case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE"; - case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK"; - case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED"; - case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED"; - case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL"; - case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED"; - case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER"; - case 0xC0140018: return "STATUS_ACPI_RS_ACCESS"; - case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE"; - case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED"; - case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED"; - case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID"; - case 0xC00A0002: return "STATUS_CTX_INVALID_PD"; - case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND"; - case 0x400A0004: return "STATUS_CTX_CDM_CONNECT"; - case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT"; - case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING"; - case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF"; - case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND"; - case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME"; - case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR"; - case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT"; - case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER"; - case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE"; - case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY"; - case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE"; - case 0xC00A0010: return "STATUS_CTX_TD_ERROR"; - case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID"; - case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE"; - case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED"; - case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND"; - case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION"; - case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY"; - case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE"; - case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID"; - case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE"; - case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT"; - case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT"; - case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT"; - case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED"; - case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED"; - case 0xC00A002E: return "STATUS_CTX_INVALID_WD"; - case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND"; - case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID"; - case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED"; - case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR"; - case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET"; - case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE"; - case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE"; - case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED"; - case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED"; - default: return "STATUS_UNKNOWN"; - } -} - - -/* - * KsPrintf - * This function is variable-argument, level-sensitive debug print routine. - * If the specified debug level for the print statement is lower or equal - * to the current debug level, the message will be printed. - * - * Arguments: - * DebugPrintLevel - Specifies at which debugging level the string should - * be printed - * DebugMessage - Variable argument ascii c string - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -VOID -KsPrintf( - LONG DebugPrintLevel, - PCHAR DebugMessage, - ... - ) -{ - va_list ap; - - va_start(ap, DebugMessage); - - if (DebugPrintLevel <= KsDebugLevel) - { - CHAR buffer[0x200]; - - vsprintf(buffer, DebugMessage, ap); - - KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer)); - } - - va_end(ap); - -} // KsPrint() - -#endif diff --git a/lnet/libcfs/winnt/winnt-fs.c b/lnet/libcfs/winnt/winnt-fs.c deleted file mode 100644 index 128781bffb2cb4c9726c2100f0cb41f6dce704f6..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-fs.c +++ /dev/null @@ -1,541 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - -const CHAR *dos_file_prefix = "\\??\\"; - -/* - * cfs_filp_open - * To open or create a file in kernel mode - * - * Arguments: - * name: name of the file to be opened or created, no dos path prefix - * flags: open/creation attribute options - * mode: access mode/permission to open or create - * err: error code - * - * Return Value: - * the pointer to the cfs_file_t or NULL if it fails - * - * Notes: - * N/A - */ - -cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) -{ - cfs_file_t * fp = NULL; - - NTSTATUS Status; - - OBJECT_ATTRIBUTES ObjectAttributes; - HANDLE FileHandle; - IO_STATUS_BLOCK IoStatus; - ACCESS_MASK DesiredAccess; - ULONG CreateDisposition; - ULONG ShareAccess; - ULONG CreateOptions; - - USHORT NameLength = 0; - USHORT PrefixLength = 0; - - UNICODE_STRING UnicodeName; - PWCHAR UnicodeString = NULL; - - ANSI_STRING AnsiName; - PUCHAR AnsiString = NULL; - - /* Analyze the flags settings */ - - if (cfs_is_flag_set(flags, O_WRONLY)) { - DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = 0; - } else if (cfs_is_flag_set(flags, O_RDWR)) { - DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; - } else { - DesiredAccess = (GENERIC_READ | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ; - } - - if (cfs_is_flag_set(flags, O_CREAT)) { - if (cfs_is_flag_set(flags, O_EXCL)) { - CreateDisposition = FILE_CREATE; - } else { - CreateDisposition = FILE_OPEN_IF; - } - } else { - CreateDisposition = FILE_OPEN; - } - - if (cfs_is_flag_set(flags, O_TRUNC)) { - if (cfs_is_flag_set(flags, O_EXCL)) { - CreateDisposition = FILE_OVERWRITE; - } else { - CreateDisposition = FILE_OVERWRITE_IF; - } - } - - CreateOptions = 0; - - if (cfs_is_flag_set(flags, O_DIRECTORY)) { - cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); - } - - if (cfs_is_flag_set(flags, O_SYNC)) { - cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); - } - - if (cfs_is_flag_set(flags, O_DIRECT)) { - cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); - } - - /* Initialize the unicode path name for the specified file */ - - NameLength = (USHORT)strlen(name); - - if (name[0] != '\\') { - PrefixLength = (USHORT)strlen(dos_file_prefix); - } - - AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1), - CFS_ALLOC_ZERO); - if (NULL == AnsiString) { - if (err) *err = -ENOMEM; - return NULL; - } - - UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1), - CFS_ALLOC_ZERO); - - if (NULL == UnicodeString) { - if (err) *err = -ENOMEM; - cfs_free(AnsiString); - return NULL; - } - - if (PrefixLength) { - RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength); - } - - RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength); - NameLength += PrefixLength; - - AnsiName.MaximumLength = NameLength + 1; - AnsiName.Length = NameLength; - AnsiName.Buffer = AnsiString; - - UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR); - UnicodeName.Length = 0; - UnicodeName.Buffer = (PWSTR)UnicodeString; - - RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE); - - /* Setup the object attributes structure for the file. */ - - InitializeObjectAttributes( - &ObjectAttributes, - &UnicodeName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL ); - - /* Now to open or create the file now */ - - Status = ZwCreateFile( - &FileHandle, - DesiredAccess, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - ShareAccess, - CreateDisposition, - CreateOptions, - NULL, - 0 ); - - /* Check the returned status of IoStatus... */ - - if (!NT_SUCCESS(IoStatus.Status)) { - *err = cfs_error_code(IoStatus.Status); - cfs_free(UnicodeString); - cfs_free(AnsiString); - return NULL; - } - - /* Allocate the cfs_file_t: libcfs file object */ - - fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO); - - if (NULL == fp) { - Status = ZwClose(FileHandle); - ASSERT(NT_SUCCESS(Status)); - *err = -ENOMEM; - cfs_free(UnicodeString); - cfs_free(AnsiString); - return NULL; - } - - fp->f_handle = FileHandle; - strcpy(fp->f_name, name); - fp->f_flags = flags; - fp->f_mode = (mode_t)mode; - fp->f_count = 1; - *err = 0; - - /* free the memory of temporary name strings */ - cfs_free(UnicodeString); - cfs_free(AnsiString); - - return fp; -} - - -/* - * cfs_filp_close - * To close the opened file and release the filp structure - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * ZERO: on success - * Non-Zero: on failure - * - * Notes: - * N/A - */ - -int cfs_filp_close(cfs_file_t *fp) -{ - NTSTATUS Status; - - ASSERT(fp != NULL); - ASSERT(fp->f_handle != NULL); - - /* release the file handle */ - Status = ZwClose(fp->f_handle); - ASSERT(NT_SUCCESS(Status)); - - /* free the file flip structure */ - cfs_free(fp); - return 0; -} - - -/* - * cfs_filp_read - * To read data from the opened file - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * buf: pointer to the buffer to contain the data - * nbytes: size in bytes to be read from the file - * pos: offset in file where reading starts, if pos - * NULL, then read from current file offset - * - * Return Value: - * Actual size read into the buffer in success case - * Error code in failure case - * - * Notes: - * N/A - */ - -int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) -{ - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; - - int rc = 0; - - /* Read data from the file into the specified buffer */ - - if (pos != NULL) { - address.QuadPart = *pos; - } else { - address.QuadPart = fp->f_pos; - } - - Status = ZwReadFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); - - if (!NT_SUCCESS(IoStatus.Status)) { - rc = cfs_error_code(IoStatus.Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { - *pos = fp->f_pos; - } - } - - return rc; -} - - -/* - * cfs_filp_wrtie - * To write specified data to the opened file - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * buf: pointer to the buffer containing the data - * nbytes: size in bytes to be written to the file - * pos: offset in file where writing starts, if pos - * NULL, then write to current file offset - * - * Return Value: - * Actual size written into the buffer in success case - * Error code in failure case - * - * Notes: - * N/A - */ - -int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) -{ - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; - int rc = 0; - - /* Write user specified data into the file */ - - if (pos != NULL) { - address.QuadPart = *pos; - } else { - address.QuadPart = fp->f_pos; - } - - Status = ZwWriteFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); - - if (!NT_SUCCESS(Status)) { - rc = cfs_error_code(Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { - *pos = fp->f_pos; - } - } - - return rc; -} - - -NTSTATUS -CompletionRoutine( - PDEVICE_OBJECT DeviceObject, - PIRP Irp, - PVOID Context) -{ - /* copy the IoStatus result */ - *Irp->UserIosb = Irp->IoStatus; - - /* singal the event we set */ - KeSetEvent(Irp->UserEvent, 0, FALSE); - - /* free the Irp we allocated */ - IoFreeIrp(Irp); - - return STATUS_MORE_PROCESSING_REQUIRED; -} - - -/* - * cfs_filp_fsync - * To sync the dirty data of the file to disk - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Error code: in failure case - * - * Notes: - * Nt kernel doesn't export such a routine to flush a file, - * we must allocate our own Irp and issue it to the file - * system driver. - */ - -int cfs_filp_fsync(cfs_file_t *fp) -{ - - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - NTSTATUS Status; - PIRP Irp; - KEVENT Event; - IO_STATUS_BLOCK IoSb; - PIO_STACK_LOCATION IrpSp; - - /* get the FileObject and the DeviceObject */ - - Status = ObReferenceObjectByHandle( - fp->f_handle, - FILE_WRITE_DATA, - NULL, - KernelMode, - (PVOID*)&FileObject, - NULL ); - - if (!NT_SUCCESS(Status)) { - return cfs_error_code(Status); - } - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - /* allocate a new Irp */ - - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - - if (!Irp) { - - ObDereferenceObject(FileObject); - return -ENOMEM; - } - - /* intialize the event */ - KeInitializeEvent(&Event, SynchronizationEvent, FALSE); - - /* setup the Irp */ - Irp->UserEvent = &Event; - Irp->UserIosb = &IoSb; - Irp->RequestorMode = KernelMode; - - Irp->Tail.Overlay.Thread = PsGetCurrentThread(); - Irp->Tail.Overlay.OriginalFileObject = FileObject; - - /* setup the Irp stack location */ - IrpSp = IoGetNextIrpStackLocation(Irp); - - IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS; - IrpSp->DeviceObject = DeviceObject; - IrpSp->FileObject = FileObject; - - IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE); - - - /* issue the Irp to the underlying file system driver */ - IoCallDriver(DeviceObject, Irp); - - /* wait until it is finished */ - KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0); - - /* cleanup our reference on it */ - ObDereferenceObject(FileObject); - - Status = IoSb.Status; - - return cfs_error_code(Status); -} - -/* - * cfs_get_file - * To increase the reference of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Non-Zero: in failure case - * - * Notes: - * N/A - */ - -int cfs_get_file(cfs_file_t *fp) -{ - InterlockedIncrement(&(fp->f_count)); - return 0; -} - - -/* - * cfs_put_file - * To decrease the reference of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * Zero: in success case - * Non-Zero: in failure case - * - * Notes: - * N/A - */ - -int cfs_put_file(cfs_file_t *fp) -{ - if (InterlockedDecrement(&(fp->f_count)) == 0) { - cfs_filp_close(fp); - } - - return 0; -} - - -/* - * cfs_file_count - * To query the reference count of the file object - * - * Arguments: - * fp: the pointer of the cfs_file_t strcture - * - * Return Value: - * the reference count of the file object - * - * Notes: - * N/A - */ - -int cfs_file_count(cfs_file_t *fp) -{ - return (int)(fp->f_count); -} diff --git a/lnet/libcfs/winnt/winnt-lock.c b/lnet/libcfs/winnt/winnt-lock.c deleted file mode 100644 index 12dbc67ab48b869095f59167db5317f1454b13ea..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-lock.c +++ /dev/null @@ -1,353 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - - -#if _X86_ - -void __declspec (naked) FASTCALL -atomic_add( - int i, - atomic_t *v - ) -{ - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - lock add dword ptr [edx][0], ecx - ret - } -} - -void __declspec (naked) FASTCALL -atomic_sub( - int i, - atomic_t *v - ) -{ - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - lock sub dword ptr [edx][0], ecx - ret - } -} - -void __declspec (naked) FASTCALL -atomic_inc( - atomic_t *v - ) -{ - //InterlockedIncrement((PULONG)(&((v)->counter))); - - //` ECX = v ; [ECX][0] = v->counter - - __asm { - lock inc dword ptr [ecx][0] - ret - } -} - -void __declspec (naked) FASTCALL -atomic_dec( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - lock dec dword ptr [ecx][0] - ret - } -} - -int __declspec (naked) FASTCALL -atomic_sub_and_test( - int i, - atomic_t *v - ) -{ - - // ECX = i - // EDX = v ; [EDX][0] = v->counter - - __asm { - xor eax, eax - lock sub dword ptr [edx][0], ecx - sete al - ret - } -} - -int __declspec (naked) FASTCALL -atomic_inc_and_test( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - xor eax, eax - lock inc dword ptr [ecx][0] - sete al - ret - } -} - -int __declspec (naked) FASTCALL -atomic_dec_and_test( - atomic_t *v - ) -{ - // ECX = v ; [ECX][0] = v->counter - - __asm { - xor eax, eax - lock dec dword ptr [ecx][0] - sete al - ret - } -} - -#else - -void FASTCALL -atomic_add( - int i, - atomic_t *v - ) -{ - InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i)); -} - -void FASTCALL -atomic_sub( - int i, - atomic_t *v - ) -{ - InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i)); -} - -void FASTCALL -atomic_inc( - atomic_t *v - ) -{ - InterlockedIncrement((PULONG)(&((v)->counter))); -} - -void FASTCALL -atomic_dec( - atomic_t *v - ) -{ - InterlockedDecrement((PULONG)(&((v)->counter))); -} - -int FASTCALL -atomic_sub_and_test( - int i, - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter - i; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -int FASTCALL -atomic_inc_and_test( - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter + 1; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -int FASTCALL -atomic_dec_and_test( - atomic_t *v - ) -{ - int counter, result; - - do { - - counter = v->counter; - result = counter + 1; - - } while ( InterlockedCompareExchange( - &(v->counter), - result, - counter) != counter); - - return (result == 0); -} - -#endif - - -/* - * rw spinlock - */ - - -void -rwlock_init(rwlock_t * rwlock) -{ - spin_lock_init(&rwlock->guard); - rwlock->count = 0; -} - -void -rwlock_fini(rwlock_t * rwlock) -{ -} - -void -read_lock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - slot->irql = KeRaiseIrqlToDpcLevel(); - - while (TRUE) { - spin_lock(&rwlock->guard); - if (rwlock->count >= 0) - break; - spin_unlock(&rwlock->guard); - } - - rwlock->count++; - spin_unlock(&rwlock->guard); -} - -void -read_unlock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - spin_lock(&rwlock->guard); - ASSERT(rwlock->count > 0); - rwlock->count--; - if (rwlock < 0) { - cfs_enter_debugger(); - } - spin_unlock(&rwlock->guard); - - KeLowerIrql(slot->irql); -} - -void -write_lock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - slot->irql = KeRaiseIrqlToDpcLevel(); - - while (TRUE) { - spin_lock(&rwlock->guard); - if (rwlock->count == 0) - break; - spin_unlock(&rwlock->guard); - } - - rwlock->count = -1; - spin_unlock(&rwlock->guard); -} - -void -write_unlock(rwlock_t * rwlock) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - ASSERT(slot->Magic == TASKSLT_MAGIC); - - spin_lock(&rwlock->guard); - ASSERT(rwlock->count == -1); - rwlock->count = 0; - spin_unlock(&rwlock->guard); - - KeLowerIrql(slot->irql); -} diff --git a/lnet/libcfs/winnt/winnt-lwt.c b/lnet/libcfs/winnt/winnt-lwt.c deleted file mode 100644 index 272cbcf412da8065e0d35e6a7cdebe87b44c41ab..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-lwt.c +++ /dev/null @@ -1,20 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -# define DEBUG_SUBSYSTEM S_LNET - diff --git a/lnet/libcfs/winnt/winnt-mem.c b/lnet/libcfs/winnt/winnt-mem.c deleted file mode 100644 index 6b66a95c6bbbce0386aa03a5f8a352f15b604915..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-mem.c +++ /dev/null @@ -1,332 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> - - -cfs_mem_cache_t *cfs_page_t_slab = NULL; -cfs_mem_cache_t *cfs_page_p_slab = NULL; - -/* - * cfs_alloc_page - * To allocate the cfs_page_t and also 1 page of memory - * - * Arguments: - * flags: the allocation options - * - * Return Value: - * pointer to the cfs_page_t strcture in success or - * NULL in failure case - * - * Notes: - * N/A - */ - -cfs_page_t * cfs_alloc_page(int flags) -{ - cfs_page_t *pg; - pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0); - - if (NULL == pg) { - cfs_enter_debugger(); - return NULL; - } - - memset(pg, 0, sizeof(cfs_page_t)); - pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0); - atomic_set(&pg->count, 1); - - if (pg->addr) { - if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) { - memset(pg->addr, 0, CFS_PAGE_SIZE); - } - } else { - cfs_enter_debugger(); - cfs_mem_cache_free(cfs_page_t_slab, pg); - pg = NULL; - } - - return pg; -} - -/* - * cfs_free_page - * To free the cfs_page_t including the page - * - * Arguments: - * pg: pointer to the cfs_page_t strcture - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ -void cfs_free_page(cfs_page_t *pg) -{ - ASSERT(pg != NULL); - ASSERT(pg->addr != NULL); - ASSERT(atomic_read(&pg->count) <= 1); - - cfs_mem_cache_free(cfs_page_p_slab, pg->addr); - cfs_mem_cache_free(cfs_page_t_slab, pg); -} - - -/* - * cfs_alloc - * To allocate memory from system pool - * - * Arguments: - * nr_bytes: length in bytes of the requested buffer - * flags: flags indiction - * - * Return Value: - * NULL: if there's no enough memory space in system - * the address of the allocated memory in success. - * - * Notes: - * This operation can be treated as atomic. - */ - -void * -cfs_alloc(size_t nr_bytes, u_int32_t flags) -{ - void *ptr; - - /* Ignore the flags: always allcoate from NonPagedPool */ - - ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs'); - - if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) { - memset(ptr, 0, nr_bytes); - } - - if (!ptr) { - cfs_enter_debugger(); - } - - return ptr; -} - -/* - * cfs_free - * To free the sepcified memory to system pool - * - * Arguments: - * addr: pointer to the buffer to be freed - * - * Return Value: - * N/A - * - * Notes: - * This operation can be treated as atomic. - */ - -void -cfs_free(void *addr) -{ - ExFreePool(addr); -} - -/* - * cfs_alloc_large - * To allocate large block of memory from system pool - * - * Arguments: - * nr_bytes: length in bytes of the requested buffer - * - * Return Value: - * NULL: if there's no enough memory space in system - * the address of the allocated memory in success. - * - * Notes: - * N/A - */ - -void * -cfs_alloc_large(size_t nr_bytes) -{ - return cfs_alloc(nr_bytes, 0); -} - -/* - * cfs_free_large - * To free the sepcified memory to system pool - * - * Arguments: - * addr: pointer to the buffer to be freed - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_free_large(void *addr) -{ - cfs_free(addr); -} - - -/* - * cfs_mem_cache_create - * To create a SLAB cache - * - * Arguments: - * name: name string of the SLAB cache to be created - * size: size in bytes of SLAB entry buffer - * offset: offset in the page - * flags: SLAB creation flags -* - * Return Value: - * The poitner of cfs_memory_cache structure in success. - * NULL pointer in failure case. - * - * Notes: - * 1, offset won't be used here. - * 2, it could be better to induce a lock to protect the access of the - * SLAB structure on SMP if there's not outside lock protection. - * 3, parameters C/D are removed. - */ - -cfs_mem_cache_t * -cfs_mem_cache_create( - const char * name, - size_t size, - size_t offset, - unsigned long flags - ) -{ - cfs_mem_cache_t * kmc = NULL; - - /* The name of the SLAB could not exceed 20 chars */ - - if (name && strlen(name) >= 20) { - goto errorout; - } - - /* Allocate and initialize the SLAB strcture */ - - kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0); - - if (NULL == kmc) { - goto errorout; - } - - memset(kmc, 0, sizeof(cfs_mem_cache_t)); - - kmc->flags = flags; - - if (name) { - strcpy(&kmc->name[0], name); - } - - /* Initialize the corresponding LookAside list */ - - ExInitializeNPagedLookasideList( - &(kmc->npll), - NULL, - NULL, - 0, - size, - 'pnmk', - 0); - -errorout: - - return kmc; -} - -/* - * cfs_mem_cache_destroy - * To destroy the unused SLAB cache - * - * Arguments: - * kmc: the SLAB cache to be destroied. - * - * Return Value: - * 0: in success case. - * 1: in failure case. - * - * Notes: - * N/A - */ - -int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc) -{ - ASSERT(kmc != NULL); - - ExDeleteNPagedLookasideList(&(kmc->npll)); - - cfs_free(kmc); - - return 0; -} - -/* - * cfs_mem_cache_alloc - * To allocate an object (LookAside entry) from the SLAB - * - * Arguments: - * kmc: the SLAB cache to be allocated from. - * flags: flags for allocation options - * - * Return Value: - * object buffer address: in success case. - * NULL: in failure case. - * - * Notes: - * N/A - */ - -void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags) -{ - void *buf = NULL; - - buf = ExAllocateFromNPagedLookasideList(&(kmc->npll)); - - return buf; -} - -/* - * cfs_mem_cache_free - * To free an object (LookAside entry) to the SLAB cache - * - * Arguments: - * kmc: the SLAB cache to be freed to. - * buf: the pointer to the object to be freed. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf) -{ - ExFreeToNPagedLookasideList(&(kmc->npll), buf); -} diff --git a/lnet/libcfs/winnt/winnt-module.c b/lnet/libcfs/winnt/winnt-module.c deleted file mode 100644 index 2b6b00888e06a4cbb6cd400610724d8e59854718..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-module.c +++ /dev/null @@ -1,160 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - -#define LIBCFS_MINOR 240 - -int libcfs_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct libcfs_ioctl_hdr *hdr; - struct libcfs_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct libcfs_ioctl_hdr *)buf; - data = (struct libcfs_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if (err) - RETURN(err); - - if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR(("LIBCFS: version mismatch kernel vs application\n")); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR(("LIBCFS: user buffer exceeds kernel buffer\n")); - RETURN(-EINVAL); - } - - if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR(("LIBCFS: user buffer too small for ioctl\n")); - RETURN(-EINVAL); - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if (err) - RETURN(err); - - if (libcfs_ioctl_is_invalid(data)) { - CERROR(("LIBCFS: ioctl not correctly formatted\n")); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - - RETURN(0); -} - -extern struct cfs_psdev_ops libcfs_psdev_ops; - -static int -libcfs_psdev_open(cfs_file_t * file) -{ - struct libcfs_device_userstate **pdu = NULL; - int rc = 0; - - pdu = (struct libcfs_device_userstate **)&file->private_data; - if (libcfs_psdev_ops.p_open != NULL) - rc = libcfs_psdev_ops.p_open(0, (void *)pdu); - else - return (-EPERM); - return rc; -} - -/* called when closing /dev/device */ -static int -libcfs_psdev_release(cfs_file_t * file) -{ - struct libcfss_device_userstate *pdu; - int rc = 0; - - pdu = file->private_data; - if (libcfs_psdev_ops.p_close != NULL) - rc = libcfs_psdev_ops.p_close(0, (void *)pdu); - else - rc = -EPERM; - return rc; -} - -static int -libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) -{ - struct cfs_psdev_file pfile; - int rc = 0; - - if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || - _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || - _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { - CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd))); - return (-EINVAL); - } - - /* Handle platform-dependent IOC requests */ - switch (cmd) { - case IOC_LIBCFS_PANIC: - if (!capable (CAP_SYS_BOOT)) - return (-EPERM); - CERROR(("debugctl-invoked panic")); - KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL); - - return (0); - case IOC_LIBCFS_MEMHOG: - - if (!capable (CAP_SYS_ADMIN)) - return -EPERM; - break; - } - - pfile.off = 0; - pfile.private_data = file->private_data; - if (libcfs_psdev_ops.p_ioctl != NULL) - rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); - else - rc = -EPERM; - return (rc); -} - -static struct file_operations libcfs_fops = { - /* lseek: */ NULL, - /* read: */ NULL, - /* write: */ NULL, - /* ioctl: */ libcfs_ioctl, - /* open: */ libcfs_psdev_open, - /* release:*/ libcfs_psdev_release -}; - -cfs_psdev_t libcfs_dev = { - LIBCFS_MINOR, - "lnet", - &libcfs_fops -}; - diff --git a/lnet/libcfs/winnt/winnt-prim.c b/lnet/libcfs/winnt/winnt-prim.c deleted file mode 100644 index 064b071ecdff6782f3702dd2eb39aeeb11fd45dd..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-prim.c +++ /dev/null @@ -1,650 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Thread routines - */ - -/* - * cfs_thread_proc - * Lustre thread procedure wrapper routine (It's an internal routine) - * - * Arguments: - * context: a structure of cfs_thread_context_t, containing - * all the necessary parameters - * - * Return Value: - * void: N/A - * - * Notes: - * N/A - */ - -void -cfs_thread_proc( - void * context - ) -{ - cfs_thread_context_t * thread_context = - (cfs_thread_context_t *) context; - - /* Execute the specified function ... */ - - if (thread_context->func) { - (thread_context->func)(thread_context->arg); - } - - /* Free the context memory */ - - cfs_free(context); - - /* Terminate this system thread */ - - PsTerminateSystemThread(STATUS_SUCCESS); -} - -/* - * cfs_kernel_thread - * Create a system thread to execute the routine specified - * - * Arguments: - * func: function to be executed in the thread - * arg: argument transferred to func function - * flag: thread creation flags. - * - * Return Value: - * int: 0 on success or error codes - * - * Notes: - * N/A - */ - -int cfs_kernel_thread(int (*func)(void *), void *arg, int flag) -{ - cfs_handle_t thread = NULL; - NTSTATUS status; - cfs_thread_context_t * context = NULL; - - /* Allocate the context to be transferred to system thread */ - - context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO); - - if (!context) { - return -ENOMEM; - } - - context->func = func; - context->arg = arg; - - /* Create system thread with the cfs_thread_proc wrapper */ - - status = PsCreateSystemThread( - &thread, - (ACCESS_MASK)0L, - 0, 0, 0, - cfs_thread_proc, - context); - - if (!NT_SUCCESS(status)) { - - - cfs_free(context); - - /* We need translate the nt status to linux error code */ - - return cfs_error_code(status); - } - - // - // Query the thread id of the newly created thread - // - - ZwClose(thread); - - return 0; -} - - -/* - * Symbols routines - */ - - -static CFS_DECL_RWSEM(cfs_symbol_lock); -CFS_LIST_HEAD(cfs_symbol_list); - -int MPSystem = FALSE; - -/* - * cfs_symbol_get - * To query the specified symbol form the symbol table - * - * Arguments: - * name: the symbol name to be queried - * - * Return Value: - * If the symbol is in the table, return the address of it. - * If not, return NULL. - * - * Notes: - * N/A - */ - -void * -cfs_symbol_get(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - sym->ref ++; - break; - } - } - up_read(&cfs_symbol_lock); - - if (sym != NULL) - return sym->value; - - return NULL; -} - -/* - * cfs_symbol_put - * To decrease the reference of the specified symbol - * - * Arguments: - * name: the symbol name to be dereferred - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_put(const char *name) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_read(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref > 0); - sym->ref--; - break; - } - } - up_read(&cfs_symbol_lock); - - LASSERT(sym != NULL); -} - - -/* - * cfs_symbol_register - * To register the specified symbol infromation - * - * Arguments: - * name: the symbol name to be dereferred - * value: the value that the symbol stands for - * - * Return Value: - * N/A - * - * Notes: - * Zero: Succeed to register - * Non-Zero: Fail to register the symbol - */ - -int -cfs_symbol_register(const char *name, const void *value) -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - struct cfs_symbol *new = NULL; - - new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO); - if (!new) { - return (-ENOMEM); - } - strncpy(new->name, name, CFS_SYMBOL_LEN); - new->value = (void *)value; - new->ref = 0; - CFS_INIT_LIST_HEAD(&new->sym_list); - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - up_write(&cfs_symbol_lock); - cfs_free(new); - return 0; // alreay registerred - } - } - list_add_tail(&new->sym_list, &cfs_symbol_list); - up_write(&cfs_symbol_lock); - - return 0; -} - -/* - * cfs_symbol_unregister - * To unregister/remove the specified symbol - * - * Arguments: - * name: the symbol name to be dereferred - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_unregister(const char *name) -{ - struct list_head *walker; - struct list_head *nxt; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each_safe(walker, nxt, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - if (!strcmp(sym->name, name)) { - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - cfs_free(sym); - break; - } - } - up_write(&cfs_symbol_lock); -} - -/* - * cfs_symbol_clean - * To clean all the symbols - * - * Arguments: - * N/A - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -cfs_symbol_clean() -{ - struct list_head *walker; - struct cfs_symbol *sym = NULL; - - down_write(&cfs_symbol_lock); - list_for_each(walker, &cfs_symbol_list) { - sym = list_entry (walker, struct cfs_symbol, sym_list); - LASSERT(sym->ref == 0); - list_del (&sym->sym_list); - cfs_free(sym); - } - up_write(&cfs_symbol_lock); - return; -} - - - -/* - * Timer routines - */ - - -/* Timer dpc procedure */ - -static void -cfs_timer_dpc_proc ( - IN PKDPC Dpc, - IN PVOID DeferredContext, - IN PVOID SystemArgument1, - IN PVOID SystemArgument2) -{ - cfs_timer_t * timer; - KIRQL Irql; - - timer = (cfs_timer_t *) DeferredContext; - - /* clear the flag */ - KeAcquireSpinLock(&(timer->Lock), &Irql); - cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - KeReleaseSpinLock(&(timer->Lock), Irql); - - /* call the user specified timer procedure */ - timer->proc((unsigned long)(timer->arg)); -} - -/* - * cfs_timer_init - * To initialize the cfs_timer_t - * - * Arguments: - * timer: the cfs_timer to be initialized - * func: the timer callback procedure - * arg: argument for the callback proc - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg) -{ - memset(timer, 0, sizeof(cfs_timer_t)); - - timer->proc = func; - timer->arg = arg; - - KeInitializeSpinLock(&(timer->Lock)); - KeInitializeTimer(&timer->Timer); - KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer); - - cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED); -} - -/* - * cfs_timer_done - * To finialize the cfs_timer_t (unused) - * - * Arguments: - * timer: the cfs_timer to be cleaned up - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_done(cfs_timer_t *timer) -{ - return; -} - -/* - * cfs_timer_arm - * To schedule the timer while touching @deadline - * - * Arguments: - * timer: the cfs_timer to be freed - * dealine: timeout value to wake up the timer - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline) -{ - LARGE_INTEGER timeout; - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){ - - timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline; - - if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) { - cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - } - - timer->deadline = deadline; - } - - KeReleaseSpinLock(&(timer->Lock), Irql); -} - -/* - * cfs_timer_disarm - * To discard the timer to be scheduled - * - * Arguments: - * timer: the cfs_timer to be discarded - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_timer_disarm(cfs_timer_t *timer) -{ - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - KeCancelTimer(&(timer->Timer)); - cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); - KeReleaseSpinLock(&(timer->Lock), Irql); -} - - -/* - * cfs_timer_is_armed - * To check the timer is scheduled or not - * - * Arguments: - * timer: the cfs_timer to be checked - * - * Return Value: - * 1: if it's armed. - * 0: if it's not. - * - * Notes: - * N/A - */ - -int cfs_timer_is_armed(cfs_timer_t *timer) -{ - int rc = 0; - KIRQL Irql; - - KeAcquireSpinLock(&(timer->Lock), &Irql); - if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) { - rc = 1; - } - KeReleaseSpinLock(&(timer->Lock), Irql); - - return rc; -} - -/* - * cfs_timer_deadline - * To query the deadline of the timer - * - * Arguments: - * timer: the cfs_timer to be queried - * - * Return Value: - * the deadline value - * - * Notes: - * N/A - */ - -cfs_time_t cfs_timer_deadline(cfs_timer_t * timer) -{ - return timer->deadline; -} - -/* - * daemonize routine stub - */ - -void cfs_daemonize(char *str) -{ - return; -} - -/* - * routine related with sigals - */ - -cfs_sigset_t cfs_get_blockedsigs() -{ - return 0; -} - -cfs_sigset_t cfs_block_allsigs() -{ - return 0; -} - -cfs_sigset_t cfs_block_sigs(sigset_t bit) -{ - return 0; -} - -void cfs_restore_sigs(cfs_sigset_t old) -{ -} - -int cfs_signal_pending(void) -{ - return 0; -} - -void cfs_clear_sigpending(void) -{ - return; -} - -/** - ** Initialize routines - **/ - -int -libcfs_arch_init(void) -{ - int rc; - - spinlock_t lock; - /* Workground to check the system is MP build or UP build */ - spin_lock_init(&lock); - spin_lock(&lock); - MPSystem = (int)lock.lock; - /* MP build system: it's a real spin, for UP build system, it - only raises the IRQL to DISPATCH_LEVEL */ - spin_unlock(&lock); - - /* create slab memory caches for page alloctors */ - cfs_page_t_slab = cfs_mem_cache_create( - "CPGT", sizeof(cfs_page_t), 0, 0 ); - - cfs_page_p_slab = cfs_mem_cache_create( - "CPGP", CFS_PAGE_SIZE, 0, 0 ); - - if ( cfs_page_t_slab == NULL || - cfs_page_p_slab == NULL ){ - rc = -ENOMEM; - goto errorout; - } - - rc = init_task_manager(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n")); - goto errorout; - } - - /* initialize the proc file system */ - rc = proc_init_fs(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n")); - cleanup_task_manager(); - goto errorout; - } - - /* initialize the tdi data */ - rc = ks_init_tdi_data(); - - if (rc != 0) { - cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n")); - proc_destroy_fs(); - cleanup_task_manager(); - goto errorout; - } - -errorout: - - if (rc != 0) { - /* destroy the taskslot cache slab */ - if (cfs_page_t_slab) { - cfs_mem_cache_destroy(cfs_page_t_slab); - } - if (cfs_page_p_slab) { - cfs_mem_cache_destroy(cfs_page_p_slab); - } - } - - return rc; -} - -void -libcfs_arch_cleanup(void) -{ - /* finialize the tdi data */ - ks_fini_tdi_data(); - - /* detroy the whole proc fs tree and nodes */ - proc_destroy_fs(); - - /* destroy the taskslot cache slab */ - if (cfs_page_t_slab) { - cfs_mem_cache_destroy(cfs_page_t_slab); - } - - if (cfs_page_p_slab) { - cfs_mem_cache_destroy(cfs_page_p_slab); - } - - return; -} - -EXPORT_SYMBOL(libcfs_arch_init); -EXPORT_SYMBOL(libcfs_arch_cleanup); diff --git a/lnet/libcfs/winnt/winnt-proc.c b/lnet/libcfs/winnt/winnt-proc.c deleted file mode 100644 index ebce30d3707dadbc0e05c2d9dbccf641bd98ff3a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-proc.c +++ /dev/null @@ -1,1990 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif - -# define DEBUG_SUBSYSTEM S_LNET - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifdef __KERNEL__ - - -/* - * /proc emulator routines ... - */ - -/* The root node of the proc fs emulation: /proc */ -cfs_proc_entry_t * proc_fs_root = NULL; - - -/* The sys root: /proc/sys */ -cfs_proc_entry_t * proc_sys_root = NULL; - - -/* The sys root: /proc/dev | to implement misc device */ - -cfs_proc_entry_t * proc_dev_root = NULL; - - -/* SLAB object for cfs_proc_entry_t allocation */ - -cfs_mem_cache_t * proc_entry_cache = NULL; - -/* root node for sysctl table */ - -cfs_sysctl_table_header_t root_table_header; - -/* The global lock to protect all the access */ - -#if LIBCFS_PROCFS_SPINLOCK -spinlock_t proc_fs_lock; - -#define INIT_PROCFS_LOCK() spin_lock_init(&proc_fs_lock) -#define LOCK_PROCFS() spin_lock(&proc_fs_lock) -#define UNLOCK_PROCFS() spin_unlock(&proc_fs_lock) - -#else - -mutex_t proc_fs_lock; - -#define INIT_PROCFS_LOCK() init_mutex(&proc_fs_lock) -#define LOCK_PROCFS() mutex_down(&proc_fs_lock) -#define UNLOCK_PROCFS() mutex_up(&proc_fs_lock) - -#endif - -static ssize_t -proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos) -{ - char *page; - ssize_t retval=0; - int eof=0; - ssize_t n, count; - char *start; - cfs_proc_entry_t * dp; - - dp = (cfs_proc_entry_t *) file->private_data; - if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0))) - return -ENOMEM; - - while ((nbytes > 0) && !eof) { - - count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); - - start = NULL; - if (dp->read_proc) { - n = dp->read_proc( page, &start, (long)*ppos, - count, &eof, dp->data); - } else - break; - - if (!start) { - /* - * For proc files that are less than 4k - */ - start = page + *ppos; - n -= (ssize_t)(*ppos); - if (n <= 0) - break; - if (n > count) - n = count; - } - if (n == 0) - break; /* End of file */ - if (n < 0) { - if (retval == 0) - retval = n; - break; - } - - n -= copy_to_user((void *)buf, start, n); - if (n == 0) { - if (retval == 0) - retval = -EFAULT; - break; - } - - *ppos += n; - nbytes -= n; - buf += n; - retval += n; - } - cfs_free(page); - - return retval; -} - -static ssize_t -proc_file_write(struct file * file, const char * buffer, - size_t count, loff_t *ppos) -{ - cfs_proc_entry_t * dp; - - dp = (cfs_proc_entry_t *) file->private_data; - - if (!dp->write_proc) - return -EIO; - - /* FIXME: does this routine need ppos? probably... */ - return dp->write_proc(file, buffer, count, dp->data); -} - -struct file_operations proc_file_operations = { - /*lseek:*/ NULL, //proc_file_lseek, - /*read:*/ proc_file_read, - /*write:*/ proc_file_write, - /*ioctl:*/ NULL, - /*open:*/ NULL, - /*release:*/ NULL -}; - -/* allocate proc entry block */ - -cfs_proc_entry_t * -proc_alloc_entry() -{ - cfs_proc_entry_t * entry = NULL; - - entry = cfs_mem_cache_alloc(proc_entry_cache, 0); - if (!entry) { - return NULL; - } - - memset(entry, 0, sizeof(cfs_proc_entry_t)); - - entry->magic = CFS_PROC_ENTRY_MAGIC; - RtlInitializeSplayLinks(&(entry->s_link)); - entry->proc_fops = &proc_file_operations; - - return entry; -} - -/* free the proc entry block */ - -void -proc_free_entry(cfs_proc_entry_t * entry) - -{ - ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); - - cfs_mem_cache_free(proc_entry_cache, entry); -} - -/* dissect the path string for a given full proc path */ - -void -proc_dissect_name( - char *path, - char **first, - int *first_len, - char **remain - ) -{ - int i = 0, j = 0, len = 0; - - *first = *remain = NULL; - *first_len = 0; - - len = strlen(path); - - while (i < len && (path[i] == '/')) i++; - - if (i < len) { - - *first = path + i; - while (i < len && (path[i] != '/')) i++; - *first_len = (path + i - *first); - - if (i + 1 < len) { - *remain = path + i + 1; - } - } -} - -/* search the children entries of the parent entry */ - -cfs_proc_entry_t * -proc_search_splay ( - cfs_proc_entry_t * parent, - char * name - ) -{ - cfs_proc_entry_t * node; - PRTL_SPLAY_LINKS link; - - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - - link = parent->root; - - while (link) { - - ANSI_STRING ename,nname; - long result; - - node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link); - - ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC); - - /* Compare the prefix in the tree with the full name */ - - RtlInitAnsiString(&ename, name); - RtlInitAnsiString(&nname, node->name); - - result = RtlCompareString(&nname, &ename,TRUE); - - if (result > 0) { - - /* The prefix is greater than the full name - so we go down the left child */ - - link = RtlLeftChild(link); - - } else if (result < 0) { - - /* The prefix is less than the full name - so we go down the right child */ - // - - link = RtlRightChild(link); - - } else { - - /* We got the entry in the splay tree and - make it root node instead */ - - parent->root = RtlSplay(link); - - return node; - } - - /* we need continue searching down the tree ... */ - } - - /* There's no the exptected entry in the splay tree */ - - return NULL; -} - -int -proc_insert_splay ( - cfs_proc_entry_t * parent, - cfs_proc_entry_t * child - ) -{ - cfs_proc_entry_t * entry; - - ASSERT(parent != NULL && child != NULL); - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - - if (!parent->root) { - parent->root = &(child->s_link); - } else { - entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link); - while (TRUE) { - long result; - ANSI_STRING ename, cname; - - ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); - - RtlInitAnsiString(&ename, entry->name); - RtlInitAnsiString(&cname, child->name); - - result = RtlCompareString(&ename, &cname,TRUE); - - if (result == 0) { - cfs_enter_debugger(); - if (entry == child) { - break; - } - return FALSE; - } - - if (result > 0) { - if (RtlLeftChild(&entry->s_link) == NULL) { - RtlInsertAsLeftChild(&entry->s_link, &child->s_link); - break; - } else { - entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link), - cfs_proc_entry_t, s_link); - } - } else { - if (RtlRightChild(&entry->s_link) == NULL) { - RtlInsertAsRightChild(&entry->s_link, &child->s_link); - break; - } else { - entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link), - cfs_proc_entry_t, s_link ); - } - } - } - } - - cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED); - parent->nlink++; - - return TRUE; -} - - -/* remove a child entry from the splay tree */ -int -proc_remove_splay ( - cfs_proc_entry_t * parent, - cfs_proc_entry_t * child - ) -{ - cfs_proc_entry_t * entry = NULL; - - ASSERT(parent != NULL && child != NULL); - ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); - ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); - ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED)); - - entry = proc_search_splay(parent, child->name); - - if (entry) { - ASSERT(entry == child); - parent->root = RtlDelete(&(entry->s_link)); - parent->nlink--; - } else { - cfs_enter_debugger(); - return FALSE; - } - - return TRUE; -} - - -/* search a node inside the proc fs tree */ - -cfs_proc_entry_t * -proc_search_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t * entry; - cfs_proc_entry_t * parent; - char *first, *remain; - int flen; - char *ename = NULL; - - parent = root; - entry = NULL; - - ename = cfs_alloc(0x21, CFS_ALLOC_ZERO); - - if (ename == NULL) { - goto errorout; - } - -again: - - /* dissect the file name string */ - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - if (flen >= 0x20) { - cfs_enter_debugger(); - entry = NULL; - goto errorout; - } - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(parent, ename); - - if (!entry) { - goto errorout; - } - - if (remain) { - name = remain; - parent = entry; - - goto again; - } - } - -errorout: - - if (ename) { - cfs_free(ename); - } - - return entry; -} - -/* insert the path nodes to the proc fs tree */ - -cfs_proc_entry_t * -proc_insert_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *entry; - cfs_proc_entry_t *parent; - char *first, *remain; - int flen; - char ename[0x20]; - - parent = root; - entry = NULL; - -again: - - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - if (flen >= 0x20) { - return NULL; - } - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(parent, ename); - - if (!entry) { - entry = proc_alloc_entry(); - memcpy(entry->name, ename, flen); - - if (entry) { - if(!proc_insert_splay(parent, entry)) { - proc_free_entry(entry); - entry = NULL; - } - } - } - - if (!entry) { - return NULL; - } - - if (remain) { - entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO; - cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); - name = remain; - parent = entry; - goto again; - } - } - - return entry; -} - -/* remove the path nodes from the proc fs tree */ - -void -proc_remove_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *entry; - char *first, *remain; - int flen; - char ename[0x20]; - - entry = NULL; - - proc_dissect_name(name, &first, &flen, &remain); - - if (first) { - - memset(ename, 0, 0x20); - memcpy(ename, first, flen); - - entry = proc_search_splay(root, ename); - - if (entry) { - - if (remain) { - ASSERT(S_ISDIR(entry->mode)); - proc_remove_entry(remain, entry); - } - - if (!entry->nlink) { - proc_remove_splay(root, entry); - proc_free_entry(entry); - } - } - } else { - cfs_enter_debugger(); - } -} - -/* create proc entry and insert it into the proc fs */ - -cfs_proc_entry_t * -create_proc_entry ( - char * name, - mode_t mode, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t *parent = root; - cfs_proc_entry_t *entry = NULL; - - if (S_ISDIR(mode)) { - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO | S_IXUGO; - } else { - if ((mode & S_IFMT) == 0) - mode |= S_IFREG; - if ((mode & S_IALLUGO) == 0) - mode |= S_IRUGO; - } - - LOCK_PROCFS(); - - ASSERT(NULL != proc_fs_root); - - if (!parent) { - parent = proc_fs_root; - } - - entry = proc_search_entry(name, parent); - - if (!entry) { - entry = proc_insert_entry(name, parent); - if (!entry) { - /* Failed to create/insert the splay node ... */ - cfs_enter_debugger(); - goto errorout; - } - /* Initializing entry ... */ - entry->mode = mode; - - if (S_ISDIR(mode)) { - cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); - } - } - -errorout: - - UNLOCK_PROCFS(); - - return entry; -} - - -/* search the specified entry form the proc fs */ - -cfs_proc_entry_t * -search_proc_entry( - char * name, - cfs_proc_entry_t * root - ) -{ - cfs_proc_entry_t * entry; - - LOCK_PROCFS(); - if (root == NULL) { - root = proc_fs_root; - } - entry = proc_search_entry(name, root); - UNLOCK_PROCFS(); - - return entry; -} - -/* remove the entry from the proc fs */ - -void -remove_proc_entry( - char * name, - cfs_proc_entry_t * parent - ) -{ - LOCK_PROCFS(); - if (parent == NULL) { - parent = proc_fs_root; - } - proc_remove_entry(name, parent); - UNLOCK_PROCFS(); -} - - -void proc_destroy_splay(cfs_proc_entry_t * entry) -{ - cfs_proc_entry_t * node; - - if (S_ISDIR(entry->mode)) { - - while (entry->root) { - node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link); - entry->root = RtlDelete(&(node->s_link)); - proc_destroy_splay(node); - } - } - - proc_free_entry(entry); -} - - -/* destory the whole proc fs tree */ - -void proc_destroy_fs() -{ - LOCK_PROCFS(); - - if (proc_fs_root) { - proc_destroy_splay(proc_fs_root); - } - - if (proc_entry_cache) { - cfs_mem_cache_destroy(proc_entry_cache); - } - - UNLOCK_PROCFS(); -} - -/* initilaize / build the proc fs tree */ - -int proc_init_fs() -{ - cfs_proc_entry_t * root = NULL; - - memset(&(root_table_header), 0, sizeof(struct ctl_table_header)); - INIT_LIST_HEAD(&(root_table_header.ctl_entry)); - - INIT_PROCFS_LOCK(); - proc_entry_cache = cfs_mem_cache_create( - NULL, - sizeof(cfs_proc_entry_t), - 0, - 0 - ); - - if (!proc_entry_cache) { - return (-ENOMEM); - } - - root = proc_alloc_entry(); - - if (!root) { - proc_destroy_fs(); - return (-ENOMEM); - } - - root->magic = CFS_PROC_ENTRY_MAGIC; - root->flags = CFS_PROC_FLAG_DIRECTORY; - root->mode = S_IFDIR | S_IRUGO | S_IXUGO; - root->nlink = 3; // root should never be deleted. - - root->name[0]='p'; - root->name[1]='r'; - root->name[2]='o'; - root->name[3]='c'; - - proc_fs_root = root; - - proc_sys_root = create_proc_entry("sys", S_IFDIR, root); - - if (!proc_sys_root) { - proc_free_entry(root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); - } - - proc_sys_root->nlink = 1; - - proc_dev_root = create_proc_entry("dev", S_IFDIR, root); - - if (!proc_dev_root) { - proc_free_entry(proc_sys_root); - proc_sys_root = NULL; - proc_free_entry(proc_fs_root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); - } - - proc_dev_root->nlink = 1; - - return 0; -} - - -static ssize_t do_rw_proc(int write, struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - int op; - cfs_proc_entry_t *de; - struct ctl_table *table; - size_t res; - ssize_t error; - - de = (cfs_proc_entry_t *) file->proc_dentry; - - if (!de || !de->data) - return -ENOTDIR; - table = (struct ctl_table *) de->data; - if (!table || !table->proc_handler) - return -ENOTDIR; - op = (write ? 002 : 004); - -// if (ctl_perm(table, op)) -// return -EPERM; - - res = count; - - /* - * FIXME: we need to pass on ppos to the handler. - */ - - error = (*table->proc_handler) (table, write, file, buf, &res); - if (error) - return error; - return res; -} - -static ssize_t proc_readsys(struct file * file, char * buf, - size_t count, loff_t *ppos) -{ - return do_rw_proc(0, file, buf, count, ppos); -} - -static ssize_t proc_writesys(struct file * file, const char * buf, - size_t count, loff_t *ppos) -{ - return do_rw_proc(1, file, (char *) buf, count, ppos); -} - - -struct file_operations proc_sys_file_operations = { - /*lseek:*/ NULL, - /*read:*/ proc_readsys, - /*write:*/ proc_writesys, - /*ioctl:*/ NULL, - /*open:*/ NULL, - /*release:*/ NULL -}; - - -/* Scan the sysctl entries in table and add them all into /proc */ -void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root) -{ - cfs_proc_entry_t * de; - int len; - mode_t mode; - - for (; table->ctl_name; table++) { - /* Can't do anything without a proc name. */ - if (!table->procname) - continue; - /* Maybe we can't do anything with it... */ - if (!table->proc_handler && !table->child) { - printk(KERN_WARNING "SYSCTL: Can't register %s\n", - table->procname); - continue; - } - - len = strlen(table->procname); - mode = table->mode; - - de = NULL; - if (table->proc_handler) - mode |= S_IFREG; - else { - de = search_proc_entry(table->procname, root); - if (de) { - break; - } - /* If the subdir exists already, de is non-NULL */ - } - - if (!de) { - - de = create_proc_entry((char *)table->procname, mode, root); - if (!de) - continue; - de->data = (void *) table; - if (table->proc_handler) { - de->proc_fops = &proc_sys_file_operations; - } - } - table->de = de; - if (de->mode & S_IFDIR) - register_proc_table(table->child, de); - } -} - - -/* - * Unregister a /proc sysctl table and any subdirectories. - */ -void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root) -{ - cfs_proc_entry_t *de; - for (; table->ctl_name; table++) { - if (!(de = table->de)) - continue; - if (de->mode & S_IFDIR) { - if (!table->child) { - printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); - continue; - } - unregister_proc_table(table->child, de); - - /* Don't unregister directories which still have entries.. */ - if (de->nlink) - continue; - } - - /* Don't unregister proc entries that are still being used.. */ - if (de->nlink) - continue; - - table->de = NULL; - remove_proc_entry((char *)table->procname, root); - } -} - -/* The generic string strategy routine: */ -int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context) -{ - int l, len; - - if (!table->data || !table->maxlen) - return -ENOTDIR; - - if (oldval && oldlenp) { - if(get_user(len, oldlenp)) - return -EFAULT; - if (len) { - l = strlen(table->data); - if (len > l) len = l; - if (len >= table->maxlen) - len = table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(0, ((char *) oldval) + len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > table->maxlen) - len = table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - if (len == table->maxlen) - len--; - ((char *) table->data)[len] = 0; - } - return 0; -} - -/** - * simple_strtoul - convert a string to an unsigned long - * @cp: The start of the string - * @endp: A pointer to the end of the parsed string will be placed here - * @base: The number base to use - */ -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0, value; - - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -#define OP_SET 0 -#define OP_AND 1 -#define OP_OR 2 -#define OP_MAX 3 -#define OP_MIN 4 - - -static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp, int conv, int op) -{ - int *i, vleft, first=1, neg, val; - size_t left, len; - - #define TMPBUFLEN 20 - char buf[TMPBUFLEN], *p; - - if (!table->data || !table->maxlen || !*lenp) - { - *lenp = 0; - return 0; - } - - i = (int *) table->data; - vleft = table->maxlen / sizeof(int); - left = *lenp; - - for (; left && vleft--; i++, first=0) { - if (write) { - while (left) { - char c; - if(get_user(c,(char *) buffer)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - ((char *) buffer)++; - } - if (!left) - break; - neg = 0; - len = left; - if (len > TMPBUFLEN-1) - len = TMPBUFLEN-1; - if(copy_from_user(buf, buffer, len)) - return -EFAULT; - buf[len] = 0; - p = buf; - if (*p == '-' && left > 1) { - neg = 1; - left--, p++; - } - if (*p < '0' || *p > '9') - break; - val = simple_strtoul(p, &p, 0) * conv; - len = p-buf; - if ((len < left) && *p && !isspace(*p)) - break; - if (neg) - val = -val; - (char *)buffer += len; - left -= len; - switch(op) { - case OP_SET: *i = val; break; - case OP_AND: *i &= val; break; - case OP_OR: *i |= val; break; - case OP_MAX: if(*i < val) - *i = val; - break; - case OP_MIN: if(*i > val) - *i = val; - break; - } - } else { - p = buf; - if (!first) - *p++ = '\t'; - sprintf(p, "%d", (*i) / conv); - len = strlen(buf); - if (len > left) - len = left; - if(copy_to_user(buffer, buf, len)) - return -EFAULT; - left -= len; - (char *)buffer += len; - } - } - - if (!write && !first && left) { - if(put_user('\n', (char *) buffer)) - return -EFAULT; - left--, ((char *)buffer)++; - } - if (write) { - p = (char *) buffer; - while (left) { - char c; - if(get_user(c, p++)) - return -EFAULT; - if (!isspace(c)) - break; - left--; - } - } - if (write && first) - return -EINVAL; - *lenp -= left; - memset(&(filp->f_pos) , 0, sizeof(loff_t)); - filp->f_pos += (loff_t)(*lenp); - return 0; -} - -/** - * proc_dointvec - read a vector of integers - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * - * Reads/writes up to table->maxlen/sizeof(unsigned int) integer - * values from/to the user buffer, treated as an ASCII string. - * - * Returns 0 on success. - */ -int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp) -{ - return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET); -} - - -/** - * proc_dostring - read a string sysctl - * @table: the sysctl table - * @write: %TRUE if this is a write to the sysctl file - * @filp: the file structure - * @buffer: the user buffer - * @lenp: the size of the user buffer - * - * Reads/writes a string from/to the user buffer. If the kernel - * buffer provided is not large enough to hold the string, the - * string is truncated. The copied string is %NULL-terminated. - * If the string is being read by the user process, it is copied - * and a newline '\n' is added. It is truncated if the buffer is - * not large enough. - * - * Returns 0 on success. - */ -int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, - void *buffer, size_t *lenp) -{ - size_t len; - char *p, c; - - if (!table->data || !table->maxlen || !*lenp || - (filp->f_pos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = 0; - p = buffer; - while (len < *lenp) { - if(get_user(c, p++)) - return -EFAULT; - if (c == 0 || c == '\n') - break; - len++; - } - if (len >= (size_t)table->maxlen) - len = (size_t)table->maxlen-1; - if(copy_from_user(table->data, buffer, len)) - return -EFAULT; - ((char *) table->data)[len] = 0; - filp->f_pos += *lenp; - } else { - len = (size_t)strlen(table->data); - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if (len > *lenp) - len = *lenp; - if (len) - if(copy_to_user(buffer, table->data, len)) - return -EFAULT; - if (len < *lenp) { - if(put_user('\n', ((char *) buffer) + len)) - return -EFAULT; - len++; - } - *lenp = len; - filp->f_pos += len; - } - return 0; -} - -/* Perform the actual read/write of a sysctl table entry. */ -int do_sysctl_strategy (cfs_sysctl_table_t *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, void **context) -{ - int op = 0, rc; - size_t len; - - if (oldval) - op |= 004; - if (newval) - op |= 002; - - if (table->strategy) { - rc = table->strategy(table, name, nlen, oldval, oldlenp, - newval, newlen, context); - if (rc < 0) - return rc; - if (rc > 0) - return 0; - } - - /* If there is no strategy routine, or if the strategy returns - * zero, proceed with automatic r/w */ - if (table->data && table->maxlen) { - if (oldval && oldlenp) { - get_user(len, oldlenp); - if (len) { - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if(copy_to_user(oldval, table->data, len)) - return -EFAULT; - if(put_user(len, oldlenp)) - return -EFAULT; - } - } - if (newval && newlen) { - len = newlen; - if (len > (size_t)table->maxlen) - len = (size_t)table->maxlen; - if(copy_from_user(table->data, newval, len)) - return -EFAULT; - } - } - return 0; -} - -static int parse_table(int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, - cfs_sysctl_table_t *table, void **context) -{ - int n; - -repeat: - - if (!nlen) - return -ENOTDIR; - if (get_user(n, name)) - return -EFAULT; - for ( ; table->ctl_name; table++) { - if (n == table->ctl_name || table->ctl_name == CTL_ANY) { - int error; - if (table->child) { -/* - if (ctl_perm(table, 001)) - return -EPERM; -*/ - if (table->strategy) { - error = table->strategy( - table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - if (error) - return error; - } - name++; - nlen--; - table = table->child; - goto repeat; - } - error = do_sysctl_strategy(table, name, nlen, - oldval, oldlenp, - newval, newlen, context); - return error; - } - } - return -ENOTDIR; -} - -int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, - void *newval, size_t newlen) -{ - struct list_head *tmp; - - if (nlen <= 0 || nlen >= CTL_MAXNAME) - return -ENOTDIR; - if (oldval) { - int old_len; - if (!oldlenp || get_user(old_len, oldlenp)) - return -EFAULT; - } - tmp = &root_table_header.ctl_entry; - do { - struct ctl_table_header *head = - list_entry(tmp, struct ctl_table_header, ctl_entry); - void *context = NULL; - int error = parse_table(name, nlen, oldval, oldlenp, - newval, newlen, head->ctl_table, - &context); - if (context) - cfs_free(context); - if (error != -ENOTDIR) - return error; - tmp = tmp->next; - } while (tmp != &root_table_header.ctl_entry); - return -ENOTDIR; -} - -/** - * register_sysctl_table - register a sysctl heirarchy - * @table: the top-level table structure - * @insert_at_head: whether the entry should be inserted in front or at the end - * - * Register a sysctl table heirarchy. @table should be a filled in ctl_table - * array. An entry with a ctl_name of 0 terminates the table. - * - * The members of the &ctl_table structure are used as follows: - * - * ctl_name - This is the numeric sysctl value used by sysctl(2). The number - * must be unique within that level of sysctl - * - * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not - * enter a sysctl file - * - * data - a pointer to data for use by proc_handler - * - * maxlen - the maximum size in bytes of the data - * - * mode - the file permissions for the /proc/sys file, and for sysctl(2) - * - * child - a pointer to the child sysctl table if this entry is a directory, or - * %NULL. - * - * proc_handler - the text handler routine (described below) - * - * strategy - the strategy routine (described below) - * - * de - for internal use by the sysctl routines - * - * extra1, extra2 - extra pointers usable by the proc handler routines - * - * Leaf nodes in the sysctl tree will be represented by a single file - * under /proc; non-leaf nodes will be represented by directories. - * - * sysctl(2) can automatically manage read and write requests through - * the sysctl table. The data and maxlen fields of the ctl_table - * struct enable minimal validation of the values being written to be - * performed, and the mode field allows minimal authentication. - * - * More sophisticated management can be enabled by the provision of a - * strategy routine with the table entry. This will be called before - * any automatic read or write of the data is performed. - * - * The strategy routine may return - * - * < 0 - Error occurred (error is passed to user process) - * - * 0 - OK - proceed with automatic read or write. - * - * > 0 - OK - read or write has been done by the strategy routine, so - * return immediately. - * - * There must be a proc_handler routine for any terminal nodes - * mirrored under /proc/sys (non-terminals are handled by a built-in - * directory handler). Several default handlers are available to - * cover common cases - - * - * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), - * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), - * proc_doulongvec_minmax() - * - * It is the handler's job to read the input buffer from user memory - * and process it. The handler should return 0 on success. - * - * This routine returns %NULL on a failure to register, and a pointer - * to the table header on success. - */ -struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, - int insert_at_head) -{ - struct ctl_table_header *tmp; - tmp = cfs_alloc(sizeof(struct ctl_table_header), 0); - if (!tmp) - return NULL; - tmp->ctl_table = table; - - INIT_LIST_HEAD(&tmp->ctl_entry); - if (insert_at_head) - list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); - else - list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); -#ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); -#endif - return tmp; -} - -/** - * unregister_sysctl_table - unregister a sysctl table heirarchy - * @header: the header returned from register_sysctl_table - * - * Unregisters the sysctl table and all children. proc entries may not - * actually be removed until they are no longer used by anyone. - */ -void unregister_sysctl_table(struct ctl_table_header * header) -{ - list_del(&header->ctl_entry); -#ifdef CONFIG_PROC_FS - unregister_proc_table(header->ctl_table, proc_sys_root); -#endif - cfs_free(header); -} - - -int cfs_psdev_register(cfs_psdev_t * psdev) -{ - cfs_proc_entry_t * entry; - - entry = create_proc_entry ( - (char *)psdev->name, - S_IFREG, - proc_dev_root - ); - - if (!entry) { - return -ENOMEM; - } - - entry->flags |= CFS_PROC_FLAG_MISCDEV; - - entry->proc_fops = psdev->fops; - entry->data = (void *)psdev; - - return 0; -} - -int cfs_psdev_deregister(cfs_psdev_t * psdev) -{ - cfs_proc_entry_t * entry; - - entry = search_proc_entry ( - (char *)psdev->name, - proc_dev_root - ); - - if (entry) { - - ASSERT(entry->data == (void *)psdev); - ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV); - - remove_proc_entry( - (char *)psdev->name, - proc_dev_root - ); - } - - return 0; -} - -extern char debug_file_path[1024]; - -#define PSDEV_LNET (0x100) -enum { - PSDEV_DEBUG = 1, /* control debugging */ - PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ - PSDEV_PRINTK, /* force all messages to console */ - PSDEV_CONSOLE_RATELIMIT, /* rate limit console messages */ - PSDEV_DEBUG_PATH, /* crashdump log location */ - PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ - PSDEV_LIBCFS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ -}; - -static struct ctl_table lnet_table[] = { - {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, -/* - {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, - sizeof(portals_upcall), 0644, NULL, &proc_dostring, - &sysctl_string}, -*/ - {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter, - sizeof(int), 0644, NULL, &proc_dointvec}, - {0} -}; - -static struct ctl_table top_table[2] = { - {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table}, - {0} -}; - -int insert_proc(void) -{ - cfs_proc_entry_t *ent; - - ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register dump_kernel\n")); - return -1; - } - ent->write_proc = trace_dk; - - ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register daemon_file\n")); - return -1; - } - ent->write_proc = trace_write_daemon_file; - ent->read_proc = trace_read_daemon_file; - - ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL); - if (ent == NULL) { - CERROR(("couldn't register debug_mb\n")); - return -1; - } - ent->write_proc = trace_write_debug_mb; - ent->read_proc = trace_read_debug_mb; - - return 0; -} - -void remove_proc(void) -{ - remove_proc_entry("sys/portals/dump_kernel", NULL); - remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_mb", NULL); - -#ifdef CONFIG_SYSCTL - if (portals_table_header) - unregister_sysctl_table(portals_table_header); - portals_table_header = NULL; -#endif -} - - -/* - * proc process routines of kernel space - */ - -cfs_file_t * -lustre_open_file(char * filename) -{ - int rc = 0; - cfs_file_t * fh = NULL; - cfs_proc_entry_t * fp = NULL; - - fp = search_proc_entry(filename, proc_fs_root); - - if (!fp) { - rc = -ENOENT; - return NULL; - } - - fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO); - - if (!fh) { - rc = -ENOMEM; - return NULL; - } - - fh->private_data = (void *)fp; - fh->f_op = fp->proc_fops; - - if (fh->f_op->open) { - rc = (fh->f_op->open)(fh); - } else { - fp->nlink++; - } - - if (0 != rc) { - cfs_free(fh); - return NULL; - } - - return fh; -} - -int -lustre_close_file(cfs_file_t * fh) -{ - int rc = 0; - cfs_proc_entry_t * fp = NULL; - - fp = (cfs_proc_entry_t *) fh->private_data; - - if (fh->f_op->release) { - rc = (fh->f_op->release)(fh); - } else { - fp->nlink--; - } - - cfs_free(fh); - - return rc; -} - -int -lustre_do_ioctl( cfs_file_t * fh, - unsigned long cmd, - ulong_ptr arg ) -{ - int rc = 0; - - if (fh->f_op->ioctl) { - rc = (fh->f_op->ioctl)(fh, cmd, arg); - } - - if (rc != 0) { - printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n", - cmd, arg, rc); - } - - return rc; -} - -int -lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl) -{ - int rc = 0; - ulong_ptr data; - - data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL); - - /* obd ioctl code */ - if (_IOC_TYPE(devctl->cmd) == 'f') { -#if 0 - struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data; - - if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE && - devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) { - - unsigned long off = obd->ioc_len; - - if (obd->ioc_pbuf1) { - obd->ioc_pbuf1 = (char *)(data + off); - off += size_round(obd->ioc_plen1); - } - - if (obd->ioc_pbuf2) { - obd->ioc_pbuf2 = (char *)(data + off); - } - } - #endif - } - - rc = lustre_do_ioctl(fh, devctl->cmd, data); - - return rc; -} - - -size_t -lustre_read_file( - cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ) -{ - size_t rc = 0; - - if (fh->f_op->read) { - rc = (fh->f_op->read) (fh, buf, size, &off); - } - - return rc; -} - - -size_t -lustre_write_file( - cfs_file_t * fh, - loff_t off, - size_t size, - char * buf - ) -{ - size_t rc = 0; - - if (fh->f_op->write) { - rc = (fh->f_op->write)(fh, buf, size, &off); - } - - return rc; -} - -#else /* !__KERNEL__ */ - -#include <lnet/api-support.h> -#include <liblustre.h> -#include <lustre_lib.h> - -/* - * proc process routines of user space - */ - -HANDLE cfs_proc_open (char * filename, int oflag) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - int rc; - - HANDLE FileHandle = INVALID_HANDLE_VALUE; - OBJECT_ATTRIBUTES ObjectAttributes; - ACCESS_MASK DesiredAccess; - ULONG CreateDisposition; - ULONG ShareAccess; - ULONG CreateOptions; - UNICODE_STRING UnicodeName; - USHORT NameLength; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - /* Check the filename: should start with "/proc" or "/dev" */ - NameLength = (USHORT)strlen(filename); - if (NameLength > 0x05) { - if (_strnicmp(filename, "/proc/", 6) == 0) { - filename += 6; - NameLength -=6; - if (NameLength <= 0) { - rc = -EINVAL; - goto errorout; - } - } else if (_strnicmp(filename, "/dev/", 5) == 0) { - } else { - rc = -EINVAL; - goto errorout; - } - } else { - rc = -EINVAL; - goto errorout; - } - - /* Analyze the flags settings */ - - if (cfs_is_flag_set(oflag, O_WRONLY)) { - DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = 0; - } else if (cfs_is_flag_set(oflag, O_RDWR)) { - DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; - } else { - DesiredAccess = (GENERIC_READ | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ; - } - - if (cfs_is_flag_set(oflag, O_CREAT)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_CREATE; - rc = -EINVAL; - goto errorout; - } else { - CreateDisposition = FILE_OPEN_IF; - } - } else { - CreateDisposition = FILE_OPEN; - } - - if (cfs_is_flag_set(oflag, O_TRUNC)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_OVERWRITE; - } else { - CreateDisposition = FILE_OVERWRITE_IF; - } - } - - CreateOptions = 0; - - if (cfs_is_flag_set(oflag, O_DIRECTORY)) { - cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); - } - - if (cfs_is_flag_set(oflag, O_SYNC)) { - cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); - } - - if (cfs_is_flag_set(oflag, O_DIRECT)) { - cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); - } - - /* Initialize the unicode path name for the specified file */ - RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK); - - /* Setup the object attributes structure for the file. */ - InitializeObjectAttributes( - &ObjectAttributes, - &UnicodeName, - OBJ_CASE_INSENSITIVE, - NULL, - NULL ); - - /* building EA for the proc entry ... */ - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = (UCHAR)NameLength; - Ea->EaValueLength = 0; - RtlCopyMemory( - &(Ea->EaName), - filename, - NameLength + 1 - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1; - - /* Now to open or create the file now */ - status = ZwCreateFile( - &FileHandle, - DesiredAccess, - &ObjectAttributes, - &iosb, - 0, - FILE_ATTRIBUTE_NORMAL, - ShareAccess, - CreateDisposition, - CreateOptions, - Ea, - EaLength ); - - /* Check the returned status of Iosb ... */ - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - goto errorout; - } - -errorout: - - return FileHandle; -} - -int cfs_proc_close(HANDLE handle) -{ - if (handle) { - NtClose((HANDLE)handle); - } - - return 0; -} - -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; - - - offset.QuadPart = 0; - - /* read file data */ - status = NtReadFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); - - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtReadFile request failed 0x%0x\n", status); - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - return iosb.Information; - } - - return cfs_error_code(status); -} - - -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; - - offset.QuadPart = -1; - - /* write buffer to the opened file */ - status = NtWriteFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); - - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtWriteFile request failed 0x%0x\n", status); - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - return iosb.Information; - } - - return cfs_error_code(status); -} - -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer) -{ - PUCHAR procdat = NULL; - CFS_PROC_IOCTL procctl; - ULONG length = 0; - ULONG extra = 0; - - NTSTATUS status; - IO_STATUS_BLOCK iosb; - - procctl.cmd = cmd; - - if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) { - struct libcfs_ioctl_data * portal; - portal = (struct libcfs_ioctl_data *) buffer; - length = portal->ioc_len; - } else if (_IOC_TYPE(cmd) == 'f') { - struct obd_ioctl_data * obd; - obd = (struct obd_ioctl_data *) buffer; - length = obd->ioc_len; - extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2); - } else if(_IOC_TYPE(cmd) == 'u') { - length = 4; - extra = 0; - } else { - printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n"); - cfs_enter_debugger(); - status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - procctl.len = length + extra; - procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL)); - - if (NULL == procdat) { - printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n"); - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL)); - memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL)); - memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length); - length += sizeof(CFS_PROC_IOCTL); - - if (_IOC_TYPE(cmd) == 'f') { - - char *ptr; - struct obd_ioctl_data * data; - struct obd_ioctl_data * obd; - - data = (struct obd_ioctl_data *) buffer; - obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)); - ptr = obd->ioc_bulk; - - if (data->ioc_inlbuf1) { - obd->ioc_inlbuf1 = ptr; - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - } - - if (data->ioc_inlbuf2) { - obd->ioc_inlbuf2 = ptr; - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - } - if (data->ioc_inlbuf3) { - obd->ioc_inlbuf3 = ptr; - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - } - if (data->ioc_inlbuf4) { - obd->ioc_inlbuf4 = ptr; - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - } - - if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && - cmd != (ULONG)OBD_IOC_BRW_READ ) { - - if (data->ioc_pbuf1 && data->ioc_plen1) { - obd->ioc_pbuf1 = &procdat[length]; - memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); - length += size_round(data->ioc_plen1); - } - - if (data->ioc_pbuf2 && data->ioc_plen2) { - obd->ioc_pbuf2 = &procdat[length]; - memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2); - length += size_round(data->ioc_plen2); - } - } - - if (obd_ioctl_is_invalid(obd)) { - cfs_enter_debugger(); - } - } - - status = NtDeviceIoControlFile( - (HANDLE)handle, - NULL, NULL, NULL, &iosb, - IOCTL_LIBCFS_ENTRY, - procdat, length, - procdat, length ); - - - if (NT_SUCCESS(status)) { - memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); - } - -errorout: - - if (procdat) { - free(procdat); - } - - return cfs_error_code(status); -} - -#endif /* __KERNEL__ */ diff --git a/lnet/libcfs/winnt/winnt-sync.c b/lnet/libcfs/winnt/winnt-sync.c deleted file mode 100644 index 5094befbf148c4670befaeab62adb26eac52680a..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-sync.c +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> - - -/* - * Wait queue routines - */ - -/* - * cfs_waitq_init - * To initialize the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_init(cfs_waitq_t *waitq) -{ - waitq->magic = CFS_WAITQ_MAGIC; - waitq->flags = 0; - INIT_LIST_HEAD(&(waitq->waiters)); - spin_lock_init(&(waitq->guard)); -} - -/* - * cfs_waitlink_init - * To initialize the wake link node - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitlink_init(cfs_waitlink_t *link) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - - memset(link, 0, sizeof(cfs_waitlink_t)); - - link->magic = CFS_WAITLINK_MAGIC; - link->flags = 0; - - link->event = &(slot->Event); - link->hits = &(slot->hits); - - atomic_inc(&slot->count); - - INIT_LIST_HEAD(&(link->waitq[0].link)); - INIT_LIST_HEAD(&(link->waitq[1].link)); - - link->waitq[0].waitl = link->waitq[1].waitl = link; -} - - -/* - * cfs_waitlink_fini - * To finilize the wake link node - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitlink_fini(cfs_waitlink_t *link) -{ - cfs_task_t * task = cfs_current(); - PTASK_SLOT slot = NULL; - - if (!task) { - /* should bugchk here */ - cfs_enter_debugger(); - return; - } - - slot = CONTAINING_RECORD(task, TASK_SLOT, task); - cfs_assert(slot->Magic == TASKSLT_MAGIC); - cfs_assert(link->magic == CFS_WAITLINK_MAGIC); - cfs_assert(link->waitq[0].waitq == NULL); - cfs_assert(link->waitq[1].waitq == NULL); - - atomic_dec(&slot->count); -} - - -/* - * cfs_waitq_add_internal - * To queue the wait link node to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * int: queue no (Normal or Forward waitq) - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add_internal(cfs_waitq_t *waitq, - cfs_waitlink_t *link, - __u32 waitqid ) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - LASSERT(waitqid < CFS_WAITQ_CHANNELS); - - spin_lock(&(waitq->guard)); - LASSERT(link->waitq[waitqid].waitq == NULL); - link->waitq[waitqid].waitq = waitq; - if (link->flags & CFS_WAITQ_EXCLUSIVE) { - list_add_tail(&link->waitq[waitqid].link, &waitq->waiters); - } else { - list_add(&link->waitq[waitqid].link, &waitq->waiters); - } - spin_unlock(&(waitq->guard)); -} -/* - * cfs_waitq_add - * To queue the wait link node to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add(cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL); -} - -/* - * cfs_waitq_add_exclusive - * To set the wait link node to exclusive mode - * and queue it to the wait queue - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_wait_link structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_add_exclusive( cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - LASSERT(waitq != NULL); - LASSERT(link != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - link->flags |= CFS_WAITQ_EXCLUSIVE; - cfs_waitq_add(waitq, link); -} - -/* - * cfs_waitq_forward - * To be determinated. - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_forward( cfs_waitlink_t *link, - cfs_waitq_t *waitq) -{ - cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD); -} - -/* - * cfs_waitq_del - * To remove the wait link node from the waitq - * - * Arguments: - * waitq: pointer to the cfs_ waitq_t structure - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_del( cfs_waitq_t *waitq, - cfs_waitlink_t *link) -{ - int i = 0; - - LASSERT(waitq != NULL); - LASSERT(link != NULL); - - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - spin_lock(&(waitq->guard)); - - for (i=0; i < CFS_WAITQ_CHANNELS; i++) { - if (link->waitq[i].waitq == waitq) - break; - } - - if (i < CFS_WAITQ_CHANNELS) { - link->waitq[i].waitq = NULL; - list_del_init(&link->waitq[i].link); - } else { - cfs_enter_debugger(); - } - - spin_unlock(&(waitq->guard)); -} - -/* - * cfs_waitq_active - * Is the waitq active (not empty) ? - * - * Arguments: - * waitq: pointer to the cfs_ waitq_t structure - * - * Return Value: - * Zero: the waitq is empty - * Non-Zero: the waitq is active - * - * Notes: - * We always returns TRUE here, the same to Darwin. - */ - -int cfs_waitq_active(cfs_waitq_t *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - - return (1); -} - -/* - * cfs_waitq_signal_nr - * To wake up all the non-exclusive tasks plus nr exclusive - * ones in the waitq - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * nr: number of exclusive tasks to be woken up - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - - -void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr) -{ - int result; - cfs_waitlink_channel_t * scan; - - LASSERT(waitq != NULL); - LASSERT(waitq->magic == CFS_WAITQ_MAGIC); - - spin_lock(&waitq->guard); - - list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) { - - cfs_waitlink_t *waitl = scan->waitl; - - result = cfs_wake_event(waitl->event); - LASSERT( result == FALSE || result == TRUE ); - - if (result) { - atomic_inc(waitl->hits); - } - - if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0) - break; - } - - spin_unlock(&waitq->guard); - return; -} - -/* - * cfs_waitq_signal - * To wake up all the non-exclusive tasks and 1 exclusive - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_signal(cfs_waitq_t *waitq) -{ - cfs_waitq_signal_nr(waitq, 1); -} - - -/* - * cfs_waitq_broadcast - * To wake up all the tasks in the waitq - * - * Arguments: - * waitq: pointer to the cfs_waitq_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_broadcast(cfs_waitq_t *waitq) -{ - LASSERT(waitq != NULL); - LASSERT(waitq->magic ==CFS_WAITQ_MAGIC); - - cfs_waitq_signal_nr(waitq, 0); -} - -/* - * cfs_waitq_wait - * To wait on the link node until it is signaled. - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state) -{ - LASSERT(link != NULL); - LASSERT(link->magic == CFS_WAITLINK_MAGIC); - - if (atomic_read(link->hits) > 0) { - atomic_dec(link->hits); - LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); - } else { - cfs_wait_event(link->event, 0); - } -} - -/* - * cfs_waitq_timedwait - * To wait the link node to be signaled with a timeout limit - * - * Arguments: - * link: pointer to the cfs_waitlink_t structure - * timeout: the timeout limitation - * - * Return Value: - * Woken up: return the difference of the current time and - * the timeout - * Timeout: return 0 - * - * Notes: - * What if it happens to be woken up at the just timeout time !? - */ - -cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link, - cfs_task_state_t state, - cfs_duration_t timeout) -{ - - if (atomic_read(link->hits) > 0) { - atomic_dec(link->hits); - LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); - return TRUE; - } - - return (cfs_duration_t)cfs_wait_event(link->event, timeout); -} - - diff --git a/lnet/libcfs/winnt/winnt-tcpip.c b/lnet/libcfs/winnt/winnt-tcpip.c deleted file mode 100644 index d0c725cdc3fdeeffb919875f4175bf0cb9d7163b..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-tcpip.c +++ /dev/null @@ -1,6706 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LIBCFS - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include <lnet/lnet.h> - -#define TDILND_MODULE_NAME L"Tdilnd" - -ks_data_t ks_data; - -ULONG -ks_tdi_send_flags(ULONG SockFlags) -{ - ULONG TdiFlags = 0; - - if (cfs_is_flag_set(SockFlags, MSG_OOB)) { - cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED); - } - - if (cfs_is_flag_set(SockFlags, MSG_MORE)) { - cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL); - } - - if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { - cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); - } - - return TdiFlags; -} - -NTSTATUS -KsIrpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - if (NULL != Context) { - KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE); - } - - return STATUS_MORE_PROCESSING_REQUIRED; - - UNREFERENCED_PARAMETER(DeviceObject); - UNREFERENCED_PARAMETER(Irp); -} - - -/* - * KsBuildTdiIrp - * Allocate a new IRP and initialize it to be issued to tdi - * - * Arguments: - * DeviceObject: device object created by the underlying - * TDI transport driver - * - * Return Value: - * PRIP: the allocated Irp in success or NULL in failure. - * - * NOTES: - * N/A - */ - -PIRP -KsBuildTdiIrp( - IN PDEVICE_OBJECT DeviceObject - ) -{ - PIRP Irp; - PIO_STACK_LOCATION IrpSp; - - // - // Allocating the IRP ... - // - - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - - if (NULL != Irp) { - - // - // Getting the Next Stack Location ... - // - - IrpSp = IoGetNextIrpStackLocation(Irp); - - // - // Initializing Irp ... - // - - IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL; - IrpSp->Parameters.DeviceIoControl.IoControlCode = 0; - } - - return Irp; -} - -/* - * KsSubmitTdiIrp - * Issue the Irp to the underlying tdi driver - * - * Arguments: - * DeviceObject: the device object created by TDI driver - * Irp: the I/O request packet to be processed - * bSynchronous: synchronous or not. If true, we need wait - * until the process is finished. - * Information: returned info - * - * Return Value: - * NTSTATUS: kernel status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsSubmitTdiIrp( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN BOOLEAN bSynchronous, - OUT PULONG Information - ) -{ - NTSTATUS Status; - KEVENT Event; - - if (bSynchronous) { - - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); - - - IoSetCompletionRoutine( - Irp, - KsIrpCompletionRoutine, - &Event, - TRUE, - TRUE, - TRUE - ); - } - - Status = IoCallDriver(DeviceObject, Irp); - - if (bSynchronous) { - - if (STATUS_PENDING == Status) { - - Status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - } - - Status = Irp->IoStatus.Status; - - if (Information) { - *Information = (ULONG)(Irp->IoStatus.Information); - } - - Irp->MdlAddress = NULL; - IoFreeIrp(Irp); - } - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n", - Status, KsNtStatusToString(Status))); - } - - return (Status); -} - - - -/* - * KsOpenControl - * Open the Control Channel Object ... - * - * Arguments: - * DeviceName: the device name to be opened - * Handle: opened handle in success case - * FileObject: the fileobject of the device - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenControl( - IN PUNICODE_STRING DeviceName, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Transport Address Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, - FILE_OPEN, - 0, - NULL, - 0 - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - - -/* - * KsCloseControl - * Release the Control Channel Handle and FileObject - * - * Arguments: - * Handle: the channel handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseControl( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsOpenAddress - * Open the tdi address object - * - * Arguments: - * DeviceName: device name of the address object - * pAddress: tdi address of the address object - * AddressLength: length in bytes of the tdi address - * Handle: the newly opened handle - * FileObject: the newly opened fileobject - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenAddress( - IN PUNICODE_STRING DeviceName, - IN PTRANSPORT_ADDRESS pAddress, - IN ULONG AddressLength, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - // - // Building EA for the Address Object to be Opened ... - // - - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH; - Ea->EaValueLength = (USHORT)AddressLength; - RtlCopyMemory( - &(Ea->EaName), - TdiTransportAddress, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - pAddress, - AddressLength - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) + - Ea->EaNameLength + AddressLength; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Transport Address Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */ - FILE_OPEN, - 0, - Ea, - EaLength - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - -/* - * KsCloseAddress - * Release the Hanlde and FileObject of an opened tdi - * address object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseAddress( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject -) -{ - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsOpenConnection - * Open a tdi connection object - * - * Arguments: - * DeviceName: device name of the connection object - * ConnectionContext: the connection context - * Handle: the newly opened handle - * FileObject: the newly opened fileobject - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsOpenConnection( - IN PUNICODE_STRING DeviceName, - IN CONNECTION_CONTEXT ConnectionContext, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - - // - // Building EA for the Address Object to be Opened ... - // - - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH; - Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT); - RtlCopyMemory( - &(Ea->EaName), - TdiConnectionContext, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - &ConnectionContext, - sizeof(CONNECTION_CONTEXT) - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - - // - // Initializing ... - // - - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - // - // Creating the Connection Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - NULL, - FILE_ATTRIBUTE_NORMAL, - 0, - FILE_OPEN, - 0, - Ea, - EaLength - ); - - - if (NT_SUCCESS(Status)) { - - // - // Now Obtaining the FileObject of the Transport Address ... - // - - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - ZwClose(*Handle); - } - - } else { - - cfs_enter_debugger(); - } - - return (Status); -} - -/* - * KsCloseConnection - * Release the Hanlde and FileObject of an opened tdi - * connection object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseConnection( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { - - ObDereferenceObject(FileObject); - } - - if (Handle) { - - Status = ZwClose(Handle); - } - - ASSERT(NT_SUCCESS(Status)); - - return (Status); -} - - -/* - * KsAssociateAddress - * Associate an address object with a connection object - * - * Arguments: - * AddressHandle: the handle of the address object - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsAssociateAddress( - IN HANDLE AddressHandle, - IN PFILE_OBJECT ConnectionObject - ) -{ - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; - - // - // Getting the DeviceObject from Connection FileObject - // - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Assocating the Address Object with the Connection Object - // - - TdiBuildAssociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - AddressHandle - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - } - - return (Status); -} - - -/* - * KsDisassociateAddress - * Disassociate the connection object (the relationship will - * the corresponding address object will be dismissed. ) - * - * Arguments: - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsDisassociateAddress( - IN PFILE_OBJECT ConnectionObject - ) -{ - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; - - // - // Getting the DeviceObject from Connection FileObject - // - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Disassocating the Address Object with the Connection Object - // - - TdiBuildDisassociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - } - - return (Status); -} - - -/* - -// -// Connection Control Event Callbacks -// - -TDI_EVENT_CONNECT -TDI_EVENT_DISCONNECT -TDI_EVENT_ERROR - -// -// Tcp Event Callbacks -// - -TDI_EVENT_RECEIVE -TDI_EVENT_RECEIVE_EXPEDITED -TDI_EVENT_CHAINED_RECEIVE -TDI_EVENT_CHAINED_RECEIVE_EXPEDITED - -// -// Udp Event Callbacks -// - -TDI_EVENT_RECEIVE_DATAGRAM -TDI_EVENT_CHAINED_RECEIVE_DATAGRAM - -*/ - - -/* - * KsSetEventHandlers - * Set the tdi event callbacks with an address object - * - * Arguments: - * AddressObject: the FileObject of the address object - * EventContext: the parameter for the callbacks - * Handlers: the handlers indictor array - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsSetEventHandlers( - IN PFILE_OBJECT AddressObject, // Address File Object - IN PVOID EventContext, // Context for Handlers - IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - PDEVICE_OBJECT DeviceObject; - USHORT i = 0; - - DeviceObject = IoGetRelatedDeviceObject(AddressObject); - - for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) { - - // - // Setup the tdi event callback handler if requested. - // - - if (Handlers->IsActive[i]) { - - PIRP Irp; - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Building the Irp to set the Event Handler ... - // - - TdiBuildSetEventHandler( - Irp, - DeviceObject, - AddressObject, - NULL, - NULL, - i, /* tdi event type */ - Handlers->Handler[i], /* tdi event handler */ - EventContext /* context for the handler */ - ); - - // - // Calling the Transprot Driver with the Prepared Irp - // - - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - - // - // tcp/ip tdi does not support these two event callbacks - // - - if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE || - i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) { - cfs_enter_debugger(); - Status = STATUS_SUCCESS; - } - } - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - } - } - - -errorout: - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n", - Status, KsNtStatusToString(Status) )); - } - - return (Status); -} - - - -/* - * KsQueryAddressInfo - * Query the address of the FileObject specified - * - * Arguments: - * FileObject: the FileObject to be queried - * AddressInfo: buffer to contain the address info - * AddressSize: length of the AddressInfo buffer - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsQueryAddressInfo( - PFILE_OBJECT FileObject, - PTDI_ADDRESS_INFO AddressInfo, - PULONG AddressSize - ) -{ - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - RtlZeroMemory(AddressInfo, *(AddressSize)); - - // - // Allocating the Tdi Setting Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - AddressInfo, - FALSE, - *(AddressSize), - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_ADDRESS_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - AddressSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW - } - - return (Status); -} - -/* - * KsQueryProviderInfo - * Query the underlying transport device's information - * - * Arguments: - * TdiDeviceName: the transport device's name string - * ProviderInfo: TDI_PROVIDER_INFO struncture - * - * Return Value: - * NTSTATUS: Nt system status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryProviderInfo( - PWSTR TdiDeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - - UNICODE_STRING ControlName; - - HANDLE Handle; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - ULONG ProviderSize = 0; - - RtlInitUnicodeString(&ControlName, TdiDeviceName); - - // - // Open the Tdi Control Channel - // - - Status = KsOpenControl( - &ControlName, - &Handle, - &FileObject - ); - - if (!NT_SUCCESS(Status)) { - - KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n")); - return (Status); - } - - // - // Obtain The Related Device Object - // - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - ProviderSize = sizeof(TDI_PROVIDER_INFO); - RtlZeroMemory(ProviderInfo, ProviderSize); - - // - // Allocating the Tdi Setting Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - ProviderInfo, - FALSE, - ProviderSize, - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_PROVIDER_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - &ProviderSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW - } - - KsCloseControl(Handle, FileObject); - - return (Status); -} - -/* - * KsQueryConnectionInfo - * Query the connection info of the FileObject specified - * (some statics data of the traffic) - * - * Arguments: - * FileObject: the FileObject to be queried - * ConnectionInfo: buffer to contain the connection info - * ConnectionSize: length of the ConnectionInfo buffer - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryConnectionInfo( - PFILE_OBJECT ConnectionObject, - PTDI_CONNECTION_INFO ConnectionInfo, - PULONG ConnectionSize - ) -{ - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - RtlZeroMemory(ConnectionInfo, *(ConnectionSize)); - - // - // Allocating the Tdi Query Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // - - Status = KsLockUserBuffer( - ConnectionInfo, - FALSE, - *(ConnectionSize), - IoModifyAccess, - &Mdl - ); - - if (NT_SUCCESS(Status)) { - - IoFreeIrp(Irp); - Irp = NULL; - } - } - - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - TDI_QUERY_CONNECTION_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - ConnectionSize - ); - - KsReleaseMdl(Mdl, FALSE); - } - - return (Status); -} - - -/* - * KsInitializeTdiAddress - * Initialize the tdi addresss - * - * Arguments: - * pTransportAddress: tdi address to be initialized - * IpAddress: the ip address of object - * IpPort: the ip port of the object - * - * Return Value: - * ULONG: the total size of the tdi address - * - * NOTES: - * N/A - */ - -ULONG -KsInitializeTdiAddress( - IN OUT PTA_IP_ADDRESS pTransportAddress, - IN ULONG IpAddress, - IN USHORT IpPort - ) -{ - pTransportAddress->TAAddressCount = 1; - pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP; - pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP; - pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort; - pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress; - - return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP); -} - -/* - * KsQueryTdiAddressLength - * Query the total size of the tdi address - * - * Arguments: - * pTransportAddress: tdi address to be queried - * - * Return Value: - * ULONG: the total size of the tdi address - * - * NOTES: - * N/A - */ - -ULONG -KsQueryTdiAddressLength( - PTRANSPORT_ADDRESS pTransportAddress - ) -{ - ULONG TotalLength = 0; - LONG i; - - PTA_ADDRESS UNALIGNED pTaAddress = NULL; - - ASSERT (NULL != pTransportAddress); - - TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) + - FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount; - - pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address; - - for (i = 0; i < pTransportAddress->TAAddressCount; i++) - { - TotalLength += pTaAddress->AddressLength; - pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress + - FIELD_OFFSET(TA_ADDRESS,Address) + - pTaAddress->AddressLength ); - } - - return (TotalLength); -} - - -/* - * KsQueryIpAddress - * Query the ip address of the tdi object - * - * Arguments: - * FileObject: tdi object to be queried - * TdiAddress: TdiAddress buffer, to store the queried - * tdi ip address - * AddressLength: buffer length of the TdiAddress - * - * Return Value: - * ULONG: the total size of the tdi ip address - * - * NOTES: - * N/A - */ - -NTSTATUS -KsQueryIpAddress( - PFILE_OBJECT FileObject, - PVOID TdiAddress, - ULONG* AddressLength - ) -{ - NTSTATUS Status; - - PTDI_ADDRESS_INFO TdiAddressInfo; - ULONG Length; - - - // - // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS - // - - Length = MAX_ADDRESS_LENGTH; - - TdiAddressInfo = (PTDI_ADDRESS_INFO) - ExAllocatePoolWithTag( - NonPagedPool, - Length, - 'KSAI' ); - - if (NULL == TdiAddressInfo) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - - Status = KsQueryAddressInfo( - FileObject, - TdiAddressInfo, - &Length - ); - -errorout: - - if (NT_SUCCESS(Status)) - { - if (*AddressLength < Length) { - - Status = STATUS_BUFFER_TOO_SMALL; - - } else { - - *AddressLength = Length; - RtlCopyMemory( - TdiAddress, - &(TdiAddressInfo->Address), - Length - ); - - Status = STATUS_SUCCESS; - } - - } else { - - } - - - if (NULL != TdiAddressInfo) { - - ExFreePool(TdiAddressInfo); - } - - return Status; -} - - -/* - * KsErrorEventHandler - * the common error event handler callback - * - * Arguments: - * TdiEventContext: should be the socket - * Status: the error code - * - * Return Value: - * Status: STATS_SUCCESS - * - * NOTES: - * We need not do anything in such a severe - * error case. System will process it for us. - */ - -NTSTATUS -KsErrorEventHandler( - IN PVOID TdiEventContext, - IN NTSTATUS Status - ) -{ - KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n", - KeGetCurrentIrql())); - - cfs_enter_debugger(); - - return (STATUS_SUCCESS); -} - - -/* - * ks_set_handlers - * setup all the event handler callbacks - * - * Arguments: - * tconn: the tdi connecton object - * - * Return Value: - * int: ks error code - * - * NOTES: - * N/A - */ - -int -ks_set_handlers( - ksock_tconn_t * tconn - ) -{ - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; - - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } - - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ - - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - - SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler); - - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler); - - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; - } - - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); - -errorout: - - return cfs_error_code(status); -} - - -/* - * ks_reset_handlers - * disable all the event handler callbacks (set to NULL) - * - * Arguments: - * tconn: the tdi connecton object - * - * Return Value: - * int: ks error code - * - * NOTES: - * N/A - */ - -int -ks_reset_handlers( - ksock_tconn_t * tconn - ) -{ - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; - - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } - - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ - - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - - SetEventHandler(handlers, TDI_EVENT_ERROR, NULL); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL); - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL); - - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; - } - - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); - -errorout: - - return cfs_error_code(status); -} - - -/* - * KsAcceptCompletionRoutine - * Irp completion routine for TdiBuildAccept (KsConnectEventHandler) - * - * Here system gives us a chance to check the conneciton is built - * ready or not. - * - * Arguments: - * DeviceObject: the device object of the transport driver - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsAcceptCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - ksock_tconn_t * child = (ksock_tconn_t *) Context; - ksock_tconn_t * parent = child->child.kstc_parent; - - KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n", - KeGetCurrentIrql() )); - - KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n", - Context, Irp->IoStatus.Status)); - - LASSERT(child->kstc_type == kstt_child); - - spin_lock(&(child->kstc_lock)); - - LASSERT(parent->kstc_state == ksts_listening); - LASSERT(child->kstc_state == ksts_connecting); - - if (NT_SUCCESS(Irp->IoStatus.Status)) { - - child->child.kstc_accepted = TRUE; - - child->kstc_state = ksts_connected; - - /* wake up the daemon thread which waits on this event */ - KeSetEvent( - &(parent->listener.kstc_accept_event), - 0, - FALSE - ); - - spin_unlock(&(child->kstc_lock)); - - KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent)); - - } else { - - /* re-use this child connecton */ - child->child.kstc_accepted = FALSE; - child->child.kstc_busy = FALSE; - child->kstc_state = ksts_associated; - - spin_unlock(&(child->kstc_lock)); - } - - /* now free the Irp */ - IoFreeIrp(Irp); - - /* drop the refer count of the child */ - ks_put_tconn(child); - - return (STATUS_MORE_PROCESSING_REQUIRED); -} - - -/* - * ks_get_vacancy_backlog - * Get a vacancy listeing child from the backlog list - * - * Arguments: - * parent: the listener daemon connection - * - * Return Value: - * the child listening connection or NULL in failure - * - * Notes - * Parent's lock should be acquired before calling. - */ - -ksock_tconn_t * -ks_get_vacancy_backlog( - ksock_tconn_t * parent - ) -{ - ksock_tconn_t * child; - - LASSERT(parent->kstc_type == kstt_listener); - LASSERT(parent->kstc_state == ksts_listening); - - if (list_empty(&(parent->listener.kstc_listening.list))) { - - child = NULL; - - } else { - - struct list_head * tmp; - - /* check the listening queue and try to get a free connecton */ - - list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - child = list_entry (tmp, ksock_tconn_t, child.kstc_link); - spin_lock(&(child->kstc_lock)); - - if (!child->child.kstc_busy) { - LASSERT(child->kstc_state == ksts_associated); - child->child.kstc_busy = TRUE; - spin_unlock(&(child->kstc_lock)); - break; - } else { - spin_unlock(&(child->kstc_lock)); - child = NULL; - } - } - } - - return child; -} - -ks_addr_slot_t * -KsSearchIpAddress(PUNICODE_STRING DeviceName) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - spin_lock(&ks_data.ksnd_addrs_lock); - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - if (RtlCompareUnicodeString( - DeviceName, - &slot->devname, - TRUE) == 0) { - break; - } - list = list->Flink; - slot = NULL; - } - - spin_unlock(&ks_data.ksnd_addrs_lock); - - return slot; -} - -void -KsCleanupIpAddresses() -{ - spin_lock(&ks_data.ksnd_addrs_lock); - - while (!IsListEmpty(&ks_data.ksnd_addrs_list)) { - - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - list = RemoveHeadList(&ks_data.ksnd_addrs_list); - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - cfs_free(slot); - ks_data.ksnd_naddrs--; - } - - cfs_assert(ks_data.ksnd_naddrs == 0); - spin_unlock(&ks_data.ksnd_addrs_lock); -} - -VOID -KsAddAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = TRUE; - slot->ip_addr = ntohl(IpAddress->in_addr); - } else { - slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO); - if (slot != NULL) { - spin_lock(&ks_data.ksnd_addrs_lock); - InsertTailList(&ks_data.ksnd_addrs_list, &slot->link); - sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++); - slot->ip_addr = ntohl(IpAddress->in_addr); - slot->up = TRUE; - RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length); - slot->devname.Length = DeviceName->Length; - slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR); - slot->devname.Buffer = slot->buffer; - spin_unlock(&ks_data.ksnd_addrs_lock); - } - } - } -} - -VOID -KsDelAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = FALSE; - } - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - } -} - -NTSTATUS -KsRegisterPnpHandlers() -{ - TDI20_CLIENT_INTERFACE_INFO ClientInfo; - - /* initialize the global ks_data members */ - RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME); - spin_lock_init(&ks_data.ksnd_addrs_lock); - InitializeListHead(&ks_data.ksnd_addrs_list); - - /* register the pnp handlers */ - RtlZeroMemory(&ClientInfo, sizeof(ClientInfo)); - ClientInfo.TdiVersion = TDI_CURRENT_VERSION; - - ClientInfo.ClientName = &ks_data.ksnd_client_name; - ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler; - ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler; - - return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo), - &ks_data.ksnd_pnp_handle); -} - -VOID -KsDeregisterPnpHandlers() -{ - if (ks_data.ksnd_pnp_handle) { - - /* De-register the pnp handlers */ - - TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle); - ks_data.ksnd_pnp_handle = NULL; - - /* cleanup all the ip address slots */ - KsCleanupIpAddresses(); - } -} - -/* - * KsConnectEventHandler - * Connect event handler event handler, called by the underlying TDI - * transport in response to an incoming request to the listening daemon. - * - * it will grab a vacancy backlog from the children tconn list, and - * build an acception Irp with it, then transfer the Irp to TDI driver. - * - * Arguments: - * TdiEventContext: the tdi connnection object of the listening daemon - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsConnectEventHandler( - IN PVOID TdiEventContext, - IN LONG RemoteAddressLength, - IN PVOID RemoteAddress, - IN LONG UserDataLength, - IN PVOID UserData, - IN LONG OptionsLength, - IN PVOID Options, - OUT CONNECTION_CONTEXT * ConnectionContext, - OUT PIRP * AcceptIrp - ) -{ - ksock_tconn_t * parent; - ksock_tconn_t * child; - - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - NTSTATUS Status; - - PIRP Irp = NULL; - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - - KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql())); - parent = (ksock_tconn_t *) TdiEventContext; - - LASSERT(parent->kstc_type == kstt_listener); - - spin_lock(&(parent->kstc_lock)); - - if (parent->kstc_state == ksts_listening) { - - /* allocate a new ConnectionInfo to backup the peer's info */ - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + - RemoteAddressLength, 'iCsK' ); - - if (NULL == ConnectionInfo) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - - /* initializing ConnectionInfo structure ... */ - - ConnectionInfo->UserDataLength = UserDataLength; - ConnectionInfo->UserData = UserData; - ConnectionInfo->OptionsLength = OptionsLength; - ConnectionInfo->Options = Options; - ConnectionInfo->RemoteAddressLength = RemoteAddressLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - - RtlCopyMemory( - ConnectionInfo->RemoteAddress, - RemoteAddress, - RemoteAddressLength - ); - - /* get the vacancy listening child tdi connections */ - - child = ks_get_vacancy_backlog(parent); - - if (child) { - - spin_lock(&(child->kstc_lock)); - child->child.kstc_info.ConnectionInfo = ConnectionInfo; - child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress; - child->kstc_state = ksts_connecting; - spin_unlock(&(child->kstc_lock)); - - } else { - - KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent)); - - Status = STATUS_INSUFFICIENT_RESOURCES; - - goto errorout; - } - - FileObject = child->child.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject (FileObject); - - Irp = KsBuildTdiIrp(DeviceObject); - - TdiBuildAccept( - Irp, - DeviceObject, - FileObject, - KsAcceptCompletionRoutine, - child, - NULL, - NULL - ); - - IoSetNextIrpStackLocation(Irp); - - /* grap the refer of the child tdi connection */ - ks_get_tconn(child); - - Status = STATUS_MORE_PROCESSING_REQUIRED; - - *AcceptIrp = Irp; - *ConnectionContext = child; - - } else { - - Status = STATUS_CONNECTION_REFUSED; - goto errorout; - } - - spin_unlock(&(parent->kstc_lock)); - - return Status; - -errorout: - - spin_unlock(&(parent->kstc_lock)); - - { - *AcceptIrp = NULL; - *ConnectionContext = NULL; - - if (ConnectionInfo) { - - ExFreePool(ConnectionInfo); - } - - if (Irp) { - - IoFreeIrp (Irp); - } - } - - return Status; -} - -/* - * KsDisconnectCompletionRoutine - * the Irp completion routine for TdiBuildDisconect - * - * We just signal the event and return MORE_PRO... to - * let the caller take the responsibility of the Irp. - * - * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the event specified by the caller - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsDisconectCompletionRoutine ( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - - KeSetEvent((PKEVENT) Context, 0, FALSE); - - return STATUS_MORE_PROCESSING_REQUIRED; - - UNREFERENCED_PARAMETER(DeviceObject); -} - - -/* - * KsDisconnectHelper - * the routine to be executed in the WorkItem procedure - * this routine is to disconnect a tdi connection - * - * Arguments: - * Workitem: the context transferred to the workitem - * - * Return Value: - * N/A - * - * Notes: - * tconn is already referred in abort_connecton ... - */ - -VOID -KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem) -{ - ksock_tconn_t * tconn = WorkItem->tconn; - - DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn); - ks_disconnect_tconn(tconn, WorkItem->Flags); - - KeSetEvent(&(WorkItem->Event), 0, FALSE); - - spin_lock(&(tconn->kstc_lock)); - cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); -} - - -/* - * KsDisconnectEventHandler - * Disconnect event handler event handler, called by the underlying TDI transport - * in response to an incoming disconnection notification from a remote node. - * - * Arguments: - * ConnectionContext: tdi connnection object - * DisconnectFlags: specifies the nature of the disconnection - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - - -NTSTATUS -KsDisconnectEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN LONG DisconnectDataLength, - IN PVOID DisconnectData, - IN LONG DisconnectInformationLength, - IN PVOID DisconnectInformation, - IN ULONG DisconnectFlags - ) -{ - ksock_tconn_t * tconn; - NTSTATUS Status; - PKS_DISCONNECT_WORKITEM WorkItem; - - tconn = (ksock_tconn_t *)ConnectionContext; - - KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n", - KeGetCurrentIrql() )); - - KsPrint((2, "tconn = %x DisconnectFlags= %xh\n", - tconn, DisconnectFlags)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - WorkItem = &(tconn->kstc_disconnect); - - if (tconn->kstc_state != ksts_connected) { - - Status = STATUS_SUCCESS; - - } else { - - if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) { - - Status = STATUS_REMOTE_DISCONNECT; - - } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) { - - Status = STATUS_GRACEFUL_DISCONNECT; - } - - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - - ks_get_tconn(tconn); - - WorkItem->Flags = DisconnectFlags; - WorkItem->tconn = tconn; - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - - /* queue the workitem to call */ - ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue); - } - } - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); -} - -NTSTATUS -KsTcpReceiveCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) -{ - NTSTATUS Status = Irp->IoStatus.Status; - - if (NT_SUCCESS(Status)) { - - ksock_tconn_t *tconn = Context->tconn; - - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext; - - KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n", - Context->KsTsduMgr->TotalBytes )); - - spin_lock(&(tconn->kstc_lock)); - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); - } else { - cfs_enter_debugger(); - } - } else { - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_enter_debugger(); - } - } - - spin_unlock(&(tconn->kstc_lock)); - - /* wake up the thread waiting for the completion of this Irp */ - KeSetEvent(Context->Event, 0, FALSE); - - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - Context->KsTsduMgr->TotalBytes ); - } - - } else { - - /* un-expected errors occur, we must abort the connection */ - ks_abort_tconn(Context->tconn); - } - - if (Context) { - - /* Freeing the Context structure... */ - ExFreePool(Context); - Context = NULL; - } - - - /* free the Irp */ - if (Irp) { - IoFreeIrp(Irp); - } - - return (Status); -} - - -/* - * KsTcpCompletionRoutine - * the Irp completion routine for TdiBuildSend and TdiBuildReceive ... - * We need call the use's own CompletionRoutine if specified. Or - * it's a synchronous case, we need signal the event. - * - * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsTcpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - if (Context) { - - PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL; - ksock_tconn_t * tconn = NULL; - - CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context; - tconn = CompletionContext->tconn; - - /* release the chained mdl */ - KsReleaseMdl(Irp->MdlAddress, FALSE); - Irp->MdlAddress = NULL; - - if (CompletionContext->CompletionRoutine) { - - if ( CompletionContext->bCounted && - InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) { - goto errorout; - } - - // - // Giving control to user specified CompletionRoutine ... - // - - CompletionContext->CompletionRoutine( - Irp, - CompletionContext - ); - - } else { - - // - // Signaling the Event ... - // - - KeSetEvent(CompletionContext->Event, 0, FALSE); - } - - /* drop the reference count of the tconn object */ - ks_put_tconn(tconn); - - } else { - - cfs_enter_debugger(); - } - -errorout: - - return STATUS_MORE_PROCESSING_REQUIRED; -} - -/* - * KsTcpSendCompletionRoutine - * the user specified Irp completion routine for asynchronous - * data transmission requests. - * - * It will do th cleanup job of the ksock_tx_t and wake up the - * ks scheduler thread - * - * Arguments: - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsTcpSendCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) -{ - NTSTATUS Status = Irp->IoStatus.Status; - ULONG rc = Irp->IoStatus.Information; - ksock_tconn_t * tconn = Context->tconn; - PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr; - - ENTRY; - - LASSERT(tconn) ; - - if (NT_SUCCESS(Status)) { - - if (Context->bCounted) { - PVOID tx = Context->CompletionContext; - - ASSERT(tconn->kstc_update_tx != NULL); - - /* update the tx, rebasing the kiov or iov pointers */ - tx = tconn->kstc_update_tx(tconn, tx, rc); - - /* update the KsTsudMgr total bytes */ - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); - - /* - * now it's time to re-queue the conns into the - * scheduler queue and wake the scheduler thread. - */ - - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, TRUE, tx, 0); - } - - } else { - - PKS_TSDU KsTsdu = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2; - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2; - - spin_lock(&tconn->kstc_lock); - /* This is bufferred sending ... */ - ASSERT(KsTsduBuf->StartOffset == 0); - - if (KsTsduBuf->DataLength > Irp->IoStatus.Information) { - /* not fully sent .... we have to abort the connection */ - spin_unlock(&tconn->kstc_lock); - ks_abort_tconn(tconn); - goto errorout; - } - - if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) { - /* free the buffer */ - ExFreePool(KsTsduBuf->UserBuffer); - KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength; - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) { - KsTsduMgr->TotalBytes -= KsTsduDat->DataLength; - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } else { - cfs_enter_debugger(); /* shoult not get here */ - } - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - list_del(&KsTsdu->Link); - KsTsduMgr->NumOfTsdu--; - KsPutKsTsdu(KsTsdu); - } - - spin_unlock(&tconn->kstc_lock); - } - - } else { - - /* cfs_enter_debugger(); */ - - /* - * for the case that the transmission is ussuccessful, - * we need abort the tdi connection, but not destroy it. - * the socknal conn will drop the refer count, then the - * tdi connection will be freed. - */ - - ks_abort_tconn(tconn); - } - -errorout: - - /* freeing the Context structure... */ - - if (Context) { - ExFreePool(Context); - Context = NULL; - } - - /* it's our duty to free the Irp. */ - - if (Irp) { - IoFreeIrp(Irp); - Irp = NULL; - } - - EXIT; - - return Status; -} - -/* - * Normal receive event handler - * - * It will move data from system Tsdu to our TsduList - */ - -NTSTATUS -KsTcpReceiveEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) -{ - NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - - BOOLEAN bIsExpedited; - BOOLEAN bIsCompleteTsdu; - - BOOLEAN bNewTsdu = FALSE; - BOOLEAN bNewBuff = FALSE; - - PCHAR Buffer = NULL; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - - ULONG BytesReceived = 0; - - PKS_TCP_COMPLETION_CONTEXT context = NULL; - - - tconn = (ksock_tconn_t *) ConnectionContext; - - ks_get_tconn(tconn); - - /* check whether the whole body of payload is received or not */ - if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) && - (BytesIndicated == BytesAvailable) ) { - bIsCompleteTsdu = TRUE; - } else { - bIsCompleteTsdu = FALSE; - } - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable)); - KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited )); - - spin_lock(&(tconn->kstc_lock)); - - /* check whether we are conntected or not listener ¡Â*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { - - *BytesTaken = BytesIndicated; - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (STATUS_SUCCESS); - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - /* if the Tsdu is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ - - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } - - /* retrieve the latest Tsdu buffer form TsduMgr - list if the list is not empty. */ - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - - /* if this Tsdu does not contain enough space, we need - allocate a new Tsdu queue. */ - - if (bNewBuff) { - if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) > - KsTsdu->TotalLength ) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } - } - - /* allocating the buffer for TSDU_TYPE_BUF */ - if (bNewBuff) { - Buffer = ExAllocatePool(NonPagedPool, BytesAvailable); - if (NULL == Buffer) { - /* there's no enough memory for us. We just try to - receive maximum bytes with a new Tsdu */ - bNewBuff = FALSE; - KsTsdu = NULL; - } - } - - /* allocate a new Tsdu in case we are not statisfied. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; - } - } - - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - if (bNewBuff) { - - /* setup up the KS_TSDU_BUF record */ - - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - KsTsduBuf->DataLength = BytesReceived = BytesAvailable; - - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); - - } else { - - /* setup the KS_TSDU_DATA to contain all the messages */ - - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsduDat->TsduFlags = 0; - - if ( KsTsdu->TotalLength - KsTsdu->LastOffset >= - KS_TSDU_STRU_SIZE(BytesAvailable) ) { - BytesReceived = BytesAvailable; - } else { - BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset - - FIELD_OFFSET(KS_TSDU_DAT, Data); - BytesReceived &= (~((ULONG)3)); - } - KsTsduDat->DataLength = BytesReceived; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived); - KsTsduDat->StartOffset = 0; - - Buffer = &KsTsduDat->Data[0]; - - KsTsdu->LastOffset += KsTsduDat->TotalLength; - } - - KsTsduMgr->TotalBytes += BytesReceived; - - if (bIsCompleteTsdu) { - - /* It's a complete receive, we just move all - the data from system to our Tsdu */ - - RtlMoveMemory( - Buffer, - Tsdu, - BytesReceived - ); - - *BytesTaken = BytesReceived; - Status = STATUS_SUCCESS; - - if (bNewTsdu) { - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); - } - - } else { - - /* there's still data in tdi internal queue, we need issue a new - Irp to receive all of them. first allocate the tcp context */ - - context = ExAllocatePoolWithTag( - NonPagedPool, - sizeof(KS_TCP_COMPLETION_CONTEXT), - 'cTsK'); - - if (!context) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - /* setup the context */ - RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT)); - - context->tconn = tconn; - context->CompletionRoutine = KsTcpReceiveCompletionRoutine; - context->CompletionContext = KsTsdu; - context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat; - context->KsTsduMgr = KsTsduMgr; - context->Event = &(KsTsduMgr->Event); - - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else { - FileObject = tconn->child.kstc_info.FileObject; - } - - DeviceObject = IoGetRelatedDeviceObject(FileObject); - - /* build new tdi Irp and setup it. */ - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - goto errorout; - } - - Status = KsLockUserBuffer( - Buffer, - FALSE, - BytesReceived, - IoModifyAccess, - &Mdl - ); - - if (!NT_SUCCESS(Status)) { - goto errorout; - } - - TdiBuildReceive( - Irp, - DeviceObject, - FileObject, - KsTcpCompletionRoutine, - context, - Mdl, - ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED), - BytesReceived - ); - - IoSetNextIrpStackLocation(Irp); - - /* return the newly built Irp to transport driver, - it will process it to receive all the data */ - - *IoRequestPacket = Irp; - *BytesTaken = 0; - - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - if (bNewBuff) { - cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); - } - ks_get_tconn(tconn); - Status = STATUS_MORE_PROCESSING_REQUIRED; - } - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); - -errorout: - - spin_unlock(&(tconn->kstc_lock)); - - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } - - if (Mdl) { - KsReleaseMdl(Mdl, FALSE); - } - - if (Irp) { - IoFreeIrp(Irp); - } - - if (context) { - ExFreePool(context); - } - - ks_abort_tconn(tconn); - ks_put_tconn(tconn); - - *BytesTaken = BytesAvailable; - Status = STATUS_SUCCESS; - - return (Status); -} - -/* - * Expedited receive event handler - */ - -NTSTATUS -KsTcpReceiveExpeditedEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) -{ - return KsTcpReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - BytesIndicated, - BytesAvailable, - BytesTaken, - Tsdu, - IoRequestPacket - ); -} - - -/* - * Bulk receive event handler - * - * It will queue all the system Tsdus to our TsduList. - * Then later ks_recv_mdl will release them. - */ - -NTSTATUS -KsTcpChainedReceiveEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - - NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_MDL KsTsduMdl; - - BOOLEAN bIsExpedited; - BOOLEAN bNewTsdu = FALSE; - - tconn = (ksock_tconn_t *) ConnectionContext; - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - /* check whether we are conntected or not listener ¡Â*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (STATUS_SUCCESS); - } - - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) { - KsTsdu = NULL; - } - } - - /* if there's no Tsdu or the free size is not enough for this - KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; - } - } - - /* just queue the KS_TSDU_MDL to the Tsdu buffer */ - - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduMdl->TsduType = TSDU_TYPE_MDL; - KsTsduMdl->DataLength = ReceiveLength; - KsTsduMdl->StartOffset = StartingOffset; - KsTsduMdl->Mdl = Tsdu; - KsTsduMdl->Descriptor = TsduDescriptor; - - KsTsdu->LastOffset += sizeof(KS_TSDU_MDL); - KsTsduMgr->TotalBytes += ReceiveLength; - - KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n", - KsTsduMgr->TotalBytes )); - - Status = STATUS_PENDING; - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - spin_unlock(&(tconn->kstc_lock)); - - /* wake up the threads waiing in ks_recv_mdl */ - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); - } - - ks_put_tconn(tconn); - - /* Return STATUS_PENDING to system because we are still - owning the MDL resources. ks_recv_mdl is expected - to free the MDL resources. */ - - return (Status); - -errorout: - - spin_unlock(&(tconn->kstc_lock)); - - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } - - /* abort the tdi connection */ - ks_abort_tconn(tconn); - ks_put_tconn(tconn); - - - Status = STATUS_SUCCESS; - - return (Status); -} - - -/* - * Expedited & Bulk receive event handler - */ - -NTSTATUS -KsTcpChainedReceiveExpeditedEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - return KsTcpChainedReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - ReceiveLength, - StartingOffset, - Tsdu, - TsduDescriptor ); -} - - -VOID -KsPrintProviderInfo( - PWSTR DeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) -{ - KsPrint((2, "%ws ProviderInfo:\n", DeviceName)); - - KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version )); - KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize )); - KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData )); - KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize )); - KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags )); - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) { - KsPrint((2, " CONNECTION_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) { - KsPrint((2, " ORDERLY_RELEASE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) { - KsPrint((2, " CONNECTIONLESS_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) { - KsPrint((2, " ERROR_FREE_DELIVERY\n")); - } - - if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) { - KsPrint((2, " SECURITY_LEVEL\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) { - KsPrint((2, " BROADCAST_SUPPORTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) { - KsPrint((2, " MULTICAST_SUPPORTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) { - KsPrint((2, " DELAYED_ACCEPTANCE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) { - KsPrint((2, " EXPEDITED_DATA\n")); - } - - if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) { - KsPrint((2, " INTERNAL_BUFFERING\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) { - KsPrint((2, " ROUTE_DIRECTED\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) { - KsPrint((2, " NO_ZERO_LENGTH\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) { - KsPrint((2, " POINT_TO_POINT\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) { - KsPrint((2, " MESSAGE_MODE\n")); - } - - if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) { - KsPrint((2, " HALF_DUPLEX\n")); - } - - KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData )); - KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData )); - KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources )); -} - - -/* - * KsAllocateKsTsdu - * Reuse a Tsdu from the freelist or allocate a new Tsdu - * from the LookAsideList table or the NonPagedPool - * - * Arguments: - * N/A - * - * Return Value: - * PKS_Tsdu: the new Tsdu or NULL if it fails - * - * Notes: - * N/A - */ - -PKS_TSDU -KsAllocateKsTsdu() -{ - PKS_TSDU KsTsdu = NULL; - - spin_lock(&(ks_data.ksnd_tsdu_lock)); - - if (!list_empty (&(ks_data.ksnd_freetsdus))) { - - LASSERT(ks_data.ksnd_nfreetsdus > 0); - - KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link); - list_del(&(KsTsdu->Link)); - ks_data.ksnd_nfreetsdus--; - - } else { - - KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc( - ks_data.ksnd_tsdu_slab, 0); - } - - spin_unlock(&(ks_data.ksnd_tsdu_lock)); - - if (NULL != KsTsdu) { - KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size); - } - - return (KsTsdu); -} - - -/* - * KsPutKsTsdu - * Move the Tsdu to the free tsdu list in ks_data. - * - * Arguments: - * KsTsdu: Tsdu to be moved. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -VOID -KsPutKsTsdu( - PKS_TSDU KsTsdu - ) -{ - spin_lock(&(ks_data.ksnd_tsdu_lock)); - - list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus)); - ks_data.ksnd_nfreetsdus++; - - spin_unlock(&(ks_data.ksnd_tsdu_lock)); -} - - -/* - * KsFreeKsTsdu - * Release a Tsdu: uninitialize then free it. - * - * Arguments: - * KsTsdu: Tsdu to be freed. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -VOID -KsFreeKsTsdu( - PKS_TSDU KsTsdu - ) -{ - cfs_mem_cache_free( - ks_data.ksnd_tsdu_slab, - KsTsdu ); -} - - -/* - * KsInitializeKsTsdu - * Initialize the Tsdu buffer header - * - * Arguments: - * KsTsdu: the Tsdu to be initialized - * Length: the total length of the Tsdu - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsTsdu( - PKS_TSDU KsTsdu, - ULONG Length - ) -{ - RtlZeroMemory(KsTsdu, Length); - KsTsdu->Magic = KS_TSDU_MAGIC; - KsTsdu->TotalLength = Length; - KsTsdu->StartOffset = KsTsdu->LastOffset = - KS_DWORD_ALIGN(sizeof(KS_TSDU)); -} - - -/* - * KsInitializeKsTsduMgr - * Initialize the management structure of - * Tsdu buffers - * - * Arguments: - * TsduMgr: the TsduMgr to be initialized - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsTsduMgr( - PKS_TSDUMGR TsduMgr - ) -{ - KeInitializeEvent( - &(TsduMgr->Event), - NotificationEvent, - FALSE - ); - - CFS_INIT_LIST_HEAD( - &(TsduMgr->TsduList) - ); - - TsduMgr->NumOfTsdu = 0; - TsduMgr->TotalBytes = 0; -} - - -/* - * KsInitializeKsChain - * Initialize the China structure for receiving - * or transmitting - * - * Arguments: - * KsChain: the KsChain to be initialized - * - * Return Value: - * VOID - * - * NOTES: - * N/A - */ - -VOID -KsInitializeKsChain( - PKS_CHAIN KsChain - ) -{ - KsInitializeKsTsduMgr(&(KsChain->Normal)); - KsInitializeKsTsduMgr(&(KsChain->Expedited)); -} - - -/* - * KsCleanupTsduMgr - * Clean up all the Tsdus in the TsduMgr list - * - * Arguments: - * KsTsduMgr: the Tsdu list manager - * - * Return Value: - * NTSTATUS: nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupTsduMgr( - PKS_TSDUMGR KsTsduMgr - ) -{ - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; - - LASSERT(NULL != KsTsduMgr); - - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); - - while (!list_empty(&KsTsduMgr->TsduList)) { - - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - // - // KsTsdu is empty now, we need free it ... - // - - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - KsFreeKsTsdu(KsTsdu); - - } else { - - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - KsTsdu->StartOffset += KsTsduDat->TotalLength; - - } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { - - ASSERT(KsTsduBuf->UserBuffer != NULL); - - if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) { - ExFreePool(KsTsduBuf->UserBuffer); - } else { - cfs_enter_debugger(); - } - - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - - // - // MDL Tsdu Unit ... - // - - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); - - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); - } - } - } - - return STATUS_SUCCESS; -} - - -/* - * KsCleanupKsChain - * Clean up the TsduMgrs of the KsChain - * - * Arguments: - * KsChain: the chain managing TsduMgr - * - * Return Value: - * NTSTATUS: nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupKsChain( - PKS_CHAIN KsChain - ) -{ - NTSTATUS Status; - - LASSERT(NULL != KsChain); - - Status = KsCleanupTsduMgr( - &(KsChain->Normal) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupTsduMgr( - &(KsChain->Expedited) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - -errorout: - - return Status; -} - - -/* - * KsCleanupTsdu - * Clean up all the Tsdus of a tdi connected object - * - * Arguments: - * tconn: the tdi connection which is connected already. - * - * Return Value: - * Nt status code - * - * NOTES: - * N/A - */ - -NTSTATUS -KsCleanupTsdu( - ksock_tconn_t * tconn - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - - if (tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child ) { - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - - Status = KsCleanupKsChain( - &(tconn->sender.kstc_recv) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupKsChain( - &(tconn->sender.kstc_send) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - } else { - - Status = KsCleanupKsChain( - &(tconn->child.kstc_recv) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - Status = KsCleanupKsChain( - &(tconn->child.kstc_send) - ); - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - - } - -errorout: - - return (Status); -} - - -/* - * KsCopyMdlChainToMdlChain - * Copy data from a [chained] Mdl to anther [chained] Mdl. - * Tdi library does not provide this function. We have to - * realize it ourselives. - * - * Arguments: - * SourceMdlChain: the source mdl - * SourceOffset: start offset of the source - * DestinationMdlChain: the dst mdl - * DestinationOffset: the offset where data are to be copied. - * BytesTobecopied: the expteced bytes to be copied - * BytesCopied: to store the really copied data length - * - * Return Value: - * NTSTATUS: STATUS_SUCCESS or other error code - * - * NOTES: - * The length of source mdl must be >= SourceOffset + BytesTobecopied - */ - -NTSTATUS -KsCopyMdlChainToMdlChain( - IN PMDL SourceMdlChain, - IN ULONG SourceOffset, - IN PMDL DestinationMdlChain, - IN ULONG DestinationOffset, - IN ULONG BytesTobecopied, - OUT PULONG BytesCopied - ) -{ - PMDL SrcMdl = SourceMdlChain; - PMDL DstMdl = DestinationMdlChain; - - PUCHAR SrcBuf = NULL; - PUCHAR DstBuf = NULL; - - ULONG dwBytes = 0; - - NTSTATUS Status = STATUS_SUCCESS; - - - while (dwBytes < BytesTobecopied) { - - ULONG Length = 0; - - while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) { - - SourceOffset -= MmGetMdlByteCount(SrcMdl); - - SrcMdl = SrcMdl->Next; - - if (NULL == SrcMdl) { - - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - } - - while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) { - - DestinationOffset -= MmGetMdlByteCount(DstMdl); - - DstMdl = DstMdl->Next; - - if (NULL == DstMdl) { - - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - } - - DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl); - - if ((NULL == DstBuf)) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - // - // Here we need skip the OVERFLOW case via RtlCopyMemory :-( - // - - if ( KsQueryMdlsSize(SrcMdl) - SourceOffset > - MmGetMdlByteCount(DstMdl) - DestinationOffset ) { - - Length = BytesTobecopied - dwBytes; - - if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) { - Length = KsQueryMdlsSize(SrcMdl) - SourceOffset; - } - - if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) { - Length = MmGetMdlByteCount(DstMdl) - DestinationOffset; - } - - SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl); - - if ((NULL == DstBuf)) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - RtlCopyMemory( - DstBuf + DestinationOffset, - SrcBuf + SourceOffset, - Length - ); - - } else { - - Status = TdiCopyMdlToBuffer( - SrcMdl, - SourceOffset, - DstBuf, - DestinationOffset, - MmGetMdlByteCount(DstMdl), - &Length - ); - - if (STATUS_BUFFER_OVERFLOW == Status) { - cfs_enter_debugger(); - } else if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - } - - SourceOffset += Length; - DestinationOffset += Length; - dwBytes += Length; - } - -errorout: - - if (NT_SUCCESS(Status)) { - *BytesCopied = dwBytes; - } else { - *BytesCopied = 0; - } - - return Status; -} - - - -/* - * KsQueryMdlSize - * Query the whole size of a MDL (may be chained) - * - * Arguments: - * Mdl: the Mdl to be queried - * - * Return Value: - * ULONG: the total size of the mdl - * - * NOTES: - * N/A - */ - -ULONG -KsQueryMdlsSize (PMDL Mdl) -{ - PMDL Next = Mdl; - ULONG Length = 0; - - - // - // Walking the MDL Chain ... - // - - while (Next) { - Length += MmGetMdlByteCount(Next); - Next = Next->Next; - } - - return (Length); -} - - -/* - * KsLockUserBuffer - * Allocate MDL for the buffer and lock the pages into - * nonpaged pool - * - * Arguments: - * UserBuffer: the user buffer to be locked - * Length: length in bytes of the buffer - * Operation: read or write access - * pMdl: the result of the created mdl - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ - -NTSTATUS -KsLockUserBuffer ( - IN PVOID UserBuffer, - IN BOOLEAN bPaged, - IN ULONG Length, - IN LOCK_OPERATION Operation, - OUT PMDL * pMdl - ) -{ - NTSTATUS Status; - PMDL Mdl = NULL; - - LASSERT(UserBuffer != NULL); - - *pMdl = NULL; - - Mdl = IoAllocateMdl( - UserBuffer, - Length, - FALSE, - FALSE, - NULL - ); - - if (Mdl == NULL) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - __try { - - if (bPaged) { - MmProbeAndLockPages( - Mdl, - KernelMode, - Operation - ); - } else { - MmBuildMdlForNonPagedPool( - Mdl - ); - } - - Status = STATUS_SUCCESS; - - *pMdl = Mdl; - - } __except (EXCEPTION_EXECUTE_HANDLER) { - - IoFreeMdl(Mdl); - - Mdl = NULL; - - cfs_enter_debugger(); - - Status = STATUS_INVALID_USER_BUFFER; - } - } - - return Status; -} - -/* - * KsMapMdlBuffer - * Map the mdl into a buffer in kernel space - * - * Arguments: - * Mdl: the mdl to be mapped - * - * Return Value: - * PVOID: the buffer mapped or NULL in failure - * - * NOTES: - * N/A - */ - -PVOID -KsMapMdlBuffer (PMDL Mdl) -{ - LASSERT(Mdl != NULL); - - return MmGetSystemAddressForMdlSafe( - Mdl, - NormalPagePriority - ); -} - - -/* - * KsReleaseMdl - * Unlock all the pages in the mdl - * - * Arguments: - * Mdl: memory description list to be released - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -VOID -KsReleaseMdl (IN PMDL Mdl, - IN int Paged ) -{ - LASSERT(Mdl != NULL); - - while (Mdl) { - - PMDL Next; - - Next = Mdl->Next; - - if (Paged) { - MmUnlockPages(Mdl); - } - - IoFreeMdl(Mdl); - - Mdl = Next; - } -} - - -/* - * ks_lock_buffer - * allocate MDL for the user spepcified buffer and lock (paging-in) - * all the pages of the buffer into system memory - * - * Arguments: - * buffer: the user buffer to be locked - * length: length in bytes of the buffer - * access: read or write access - * mdl: the result of the created mdl - * - * Return Value: - * int: the ks error code: 0: success / -x: failture - * - * Notes: - * N/A - */ - -int -ks_lock_buffer ( - void * buffer, - int paged, - int length, - LOCK_OPERATION access, - ksock_mdl_t ** kmdl - ) -{ - NTSTATUS status; - - status = KsLockUserBuffer( - buffer, - paged !=0, - length, - access, - kmdl - ); - - return cfs_error_code(status); -} - - -/* - * ks_map_mdl - * Map the mdl pages into kernel space - * - * Arguments: - * mdl: the mdl to be mapped - * - * Return Value: - * void *: the buffer mapped or NULL in failure - * - * Notes: - * N/A - */ - -void * -ks_map_mdl (ksock_mdl_t * mdl) -{ - LASSERT(mdl != NULL); - - return KsMapMdlBuffer(mdl); -} - -/* - * ks_release_mdl - * Unlock all the pages in the mdl and release the mdl - * - * Arguments: - * mdl: memory description list to be released - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_release_mdl (ksock_mdl_t *mdl, int paged) -{ - LASSERT(mdl != NULL); - - KsReleaseMdl(mdl, paged); -} - - -/* - * ks_create_tconn - * allocate a new tconn structure from the SLAB cache or - * NonPaged sysetm pool - * - * Arguments: - * N/A - * - * Return Value: - * ksock_tconn_t *: the address of tconn or NULL if it fails - * - * NOTES: - * N/A - */ - -ksock_tconn_t * -ks_create_tconn() -{ - ksock_tconn_t * tconn = NULL; - - /* allocate ksoc_tconn_t from the slab cache memory */ - - tconn = (ksock_tconn_t *)cfs_mem_cache_alloc( - ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO); - - if (tconn) { - - /* zero tconn elements */ - memset(tconn, 0, sizeof(ksock_tconn_t)); - - /* initialize the tconn ... */ - tconn->kstc_magic = KS_TCONN_MAGIC; - - ExInitializeWorkItem( - &(tconn->kstc_disconnect.WorkItem), - KsDisconnectHelper, - &(tconn->kstc_disconnect) - ); - - KeInitializeEvent( - &(tconn->kstc_disconnect.Event), - SynchronizationEvent, - FALSE ); - - ExInitializeWorkItem( - &(tconn->kstc_destroy), - ks_destroy_tconn, - tconn - ); - - spin_lock_init(&(tconn->kstc_lock)); - - ks_get_tconn(tconn); - - spin_lock(&(ks_data.ksnd_tconn_lock)); - - /* attach it into global list in ks_data */ - - list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns)); - ks_data.ksnd_ntconns++; - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000; - } - - return (tconn); -} - - -/* - * ks_free_tconn - * free the tconn structure to the SLAB cache or NonPaged - * sysetm pool - * - * Arguments: - * tconn: the tcon is to be freed - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_free_tconn(ksock_tconn_t * tconn) -{ - LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0); - - spin_lock(&(ks_data.ksnd_tconn_lock)); - - /* remove it from the global list */ - list_del(&tconn->kstc_list); - ks_data.ksnd_ntconns--; - - /* if this is the last tconn, it would be safe for - ks_tdi_fini_data to quit ... */ - if (ks_data.ksnd_ntconns == 0) { - cfs_wake_event(&ks_data.ksnd_tconn_exit); - } - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - /* free the structure memory */ - cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn); -} - - -/* - * ks_init_listener - * Initialize the tconn as a listener (daemon) - * - * Arguments: - * tconn: the listener tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_init_listener( - ksock_tconn_t * tconn - ) -{ - /* preparation: intialize the tconn members */ - - tconn->kstc_type = kstt_listener; - - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list)); - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list)); - - cfs_init_event( &(tconn->listener.kstc_accept_event), - TRUE, - FALSE ); - - cfs_init_event( &(tconn->listener.kstc_destroy_event), - TRUE, - FALSE ); - - tconn->kstc_state = ksts_inited; -} - - -/* - * ks_init_sender - * Initialize the tconn as a sender - * - * Arguments: - * tconn: the sender tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_init_sender( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_sender; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - KsInitializeKsChain(&(tconn->sender.kstc_recv)); - KsInitializeKsChain(&(tconn->sender.kstc_send)); - - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - - tconn->kstc_state = ksts_inited; -} - -/* - * ks_init_child - * Initialize the tconn as a child - * - * Arguments: - * tconn: the child tconn - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_init_child( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_child; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - - KsInitializeKsChain(&(tconn->child.kstc_recv)); - KsInitializeKsChain(&(tconn->child.kstc_send)); - - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - - tconn->kstc_state = ksts_inited; -} - -/* - * ks_get_tconn - * increase the reference count of the tconn with 1 - * - * Arguments: - * tconn: the tdi connection to be referred - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_get_tconn( - ksock_tconn_t * tconn - ) -{ - atomic_inc(&(tconn->kstc_refcount)); -} - -/* - * ks_put_tconn - * decrease the reference count of the tconn and destroy - * it if the refercount becomes 0. - * - * Arguments: - * tconn: the tdi connection to be dereferred - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_put_tconn( - ksock_tconn_t *tconn - ) -{ - if (atomic_dec_and_test(&(tconn->kstc_refcount))) { - - spin_lock(&(tconn->kstc_lock)); - - if ( ( tconn->kstc_type == kstt_child || - tconn->kstc_type == kstt_sender ) && - ( tconn->kstc_state == ksts_connected ) ) { - - spin_unlock(&(tconn->kstc_lock)); - - ks_abort_tconn(tconn); - - } else { - - if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) { - cfs_enter_debugger(); - } else { - ExQueueWorkItem( - &(tconn->kstc_destroy), - DelayedWorkQueue - ); - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY); - } - - spin_unlock(&(tconn->kstc_lock)); - } - } -} - -/* - * ks_destroy_tconn - * cleanup the tdi connection and free it - * - * Arguments: - * tconn: the tdi connection to be cleaned. - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ - -void -ks_destroy_tconn( - ksock_tconn_t * tconn - ) -{ - LASSERT(tconn->kstc_refcount.counter == 0); - - if (tconn->kstc_type == kstt_listener) { - - ks_reset_handlers(tconn); - - /* for listener, we just need to close the address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); - - tconn->kstc_state = ksts_inited; - - } else if (tconn->kstc_type == kstt_child) { - - /* for child tdi conections */ - - /* disassociate the relation between it's connection object - and the address object */ - - if (tconn->kstc_state == ksts_associated) { - KsDisassociateAddress( - tconn->child.kstc_info.FileObject - ); - } - - /* release the connection object */ - - KsCloseConnection( - tconn->child.kstc_info.Handle, - tconn->child.kstc_info.FileObject - ); - - /* release it's refer of it's parent's address object */ - KsCloseAddress( - NULL, - tconn->kstc_addr.FileObject - ); - - spin_lock(&tconn->child.kstc_parent->kstc_lock); - spin_lock(&tconn->kstc_lock); - - tconn->kstc_state = ksts_inited; - - /* remove it frome it's parent's queues */ - - if (tconn->child.kstc_queued) { - - list_del(&(tconn->child.kstc_link)); - - if (tconn->child.kstc_queueno) { - - LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0); - tconn->child.kstc_parent->listener.kstc_accepted.num -= 1; - - } else { - - LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0); - tconn->child.kstc_parent->listener.kstc_listening.num -= 1; - } - - tconn->child.kstc_queued = FALSE; - } - - spin_unlock(&tconn->kstc_lock); - spin_unlock(&tconn->child.kstc_parent->kstc_lock); - - /* drop the reference of the parent tconn */ - ks_put_tconn(tconn->child.kstc_parent); - - } else if (tconn->kstc_type == kstt_sender) { - - ks_reset_handlers(tconn); - - /* release the connection object */ - - KsCloseConnection( - tconn->sender.kstc_info.Handle, - tconn->sender.kstc_info.FileObject - ); - - /* release it's refer of it's parent's address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); - - tconn->kstc_state = ksts_inited; - - } else { - cfs_enter_debugger(); - } - - /* free the tconn structure ... */ - - ks_free_tconn(tconn); -} - -int -ks_query_data( - ksock_tconn_t * tconn, - size_t * size, - int bIsExpedited ) -{ - int rc = 0; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - - *size = 0; - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; - } - - if (tconn->kstc_state != ksts_connected) { - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - *size = KsTsduMgr->TotalBytes; - spin_unlock(&(tconn->kstc_lock)); - -errorout: - - ks_put_tconn(tconn); - - return (rc); -} - -/* - * ks_get_tcp_option - * Query the the options of the tcp stream connnection - * - * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer to store the option value - * Length: the length of the value, to be returned - * - * Return Value: - * int: ks return code - * - * NOTES: - * N/A - */ - -int -ks_get_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - PULONG Length - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; - - TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; - - KEVENT Event; - - /* make sure the tdi connection is connected ? */ - - ks_get_tconn(tconn); - - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } - - QueryInfoEx.ID.toi_id = ID; - QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION; - QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL; - QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY; - QueryInfoEx.ID.toi_entity.tei_instance = 0; - - RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE); - - KeInitializeEvent(&Event, NotificationEvent, FALSE); - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_QUERY_INFORMATION_EX, - DeviceObject, - &QueryInfoEx, - sizeof(TCP_REQUEST_QUERY_INFORMATION_EX), - OptionValue, - *Length, - FALSE, - &Event, - &IoStatus - ); - - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp = IoGetNextIrpStackLocation(Irp); - - if (IrpSp == NULL) { - - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; - - Status = IoCallDriver(DeviceObject, Irp); - - if (Status == STATUS_PENDING) { - - KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - Status = IoStatus.Status; - } - - - if (NT_SUCCESS(Status)) { - *Length = IoStatus.Information; - } else { - cfs_enter_debugger(); - memset(OptionValue, 0, *Length); - Status = STATUS_SUCCESS; - } - -errorout: - - ks_put_tconn(tconn); - - return cfs_error_code(Status); -} - -/* - * ks_set_tcp_option - * Set the the options for the tcp stream connnection - * - * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer containing the new option value - * Length: the length of the value - * - * Return Value: - * int: ks return code - * - * NOTES: - * N/A - */ - -NTSTATUS -ks_set_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - ULONG Length - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; - - ULONG SetInfoExLength; - PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; - - PKEVENT Event; - - /* make sure the tdi connection is connected ? */ - - ks_get_tconn(tconn); - - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } - - SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT); - - SetInfoEx = ExAllocatePoolWithTag( - NonPagedPool, - SetInfoExLength, - 'TSSK' - ); - - if (SetInfoEx == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - SetInfoEx->ID.toi_id = ID; - - SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION; - SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL; - SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY; - SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE; - - SetInfoEx->BufferSize = Length; - RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length); - - Event = (PKEVENT)(&(SetInfoEx->Buffer[Length])); - KeInitializeEvent(Event, NotificationEvent, FALSE); - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_SET_INFORMATION_EX, - DeviceObject, - SetInfoEx, - SetInfoExLength, - NULL, - 0, - FALSE, - Event, - &IoStatus - ); - - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp = IoGetNextIrpStackLocation(Irp); - - if (IrpSp == NULL) { - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; - - Status = IoCallDriver(DeviceObject, Irp); - - if (Status == STATUS_PENDING) { - - KeWaitForSingleObject( - Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - Status = IoStatus.Status; - } - -errorout: - - if (SetInfoEx) { - ExFreePool(SetInfoEx); - } - - if (!NT_SUCCESS(Status)) { - printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n", - ID, Status); - Status = STATUS_SUCCESS; - } - - ks_put_tconn(tconn); - - return cfs_error_code(Status); -} - -/* - * ks_bind_tconn - * bind the tdi connection object with an address - * - * Arguments: - * tconn: tconn to be bound - * parent: the parent tconn object - * ipaddr: the ip address - * port: the port number - * - * Return Value: - * int: 0 for success or ks error codes. - * - * NOTES: - * N/A - */ - -int -ks_bind_tconn ( - ksock_tconn_t * tconn, - ksock_tconn_t * parent, - ulong_ptr addr, - unsigned short port - ) -{ - NTSTATUS status; - int rc = 0; - - ksock_tdi_addr_t taddr; - - memset(&taddr, 0, sizeof(ksock_tdi_addr_t)); - - if (tconn->kstc_state != ksts_inited) { - - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); - - goto errorout; - - } else if (tconn->kstc_type == kstt_child) { - - if (NULL == parent) { - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); - - goto errorout; - } - - /* refer it's parent's address object */ - - taddr = parent->kstc_addr; - ObReferenceObject(taddr.FileObject); - - ks_get_tconn(parent); - - } else { - - PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi); - ULONG AddrLen = 0; - - /* intialize the tdi address*/ - - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); - - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - - - /* open the transport address object */ - - AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + - TDI_ADDRESS_LENGTH_IP; - - status = KsOpenAddress( - &(tconn->kstc_dev), - &(taddr.Tdi), - AddrLen, - &(taddr.Handle), - &(taddr.FileObject) - ); - - if (!NT_SUCCESS(status)) { - - KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n", - addr, port, status )); - rc = cfs_error_code(status); - goto errorout; - } - } - - if (tconn->kstc_type == kstt_child) { - tconn->child.kstc_parent = parent; - } - - tconn->kstc_state = ksts_bind; - tconn->kstc_addr = taddr; - -errorout: - - return (rc); -} - -/* - * ks_build_tconn - * build tcp/streaming connection to remote peer - * - * Arguments: - * tconn: tconn to be connected to the peer - * addr: the peer's ip address - * port: the peer's port number - * - * Return Value: - * int: 0 for success or ks error codes. - * - * Notes: - * N/A - */ - -int -ks_build_tconn( - ksock_tconn_t * tconn, - ulong_ptr addr, - unsigned short port - ) -{ - int rc = 0; - NTSTATUS status = STATUS_SUCCESS; - - - PFILE_OBJECT ConnectionObject = NULL; - PDEVICE_OBJECT DeviceObject = NULL; - - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - ULONG AddrLength; - - PIRP Irp = NULL; - - LASSERT(tconn->kstc_type == kstt_sender); - LASSERT(tconn->kstc_state == ksts_bind); - - ks_get_tconn(tconn); - - { - /* set the event callbacks */ - rc = ks_set_handlers(tconn); - - if (rc < 0) { - cfs_enter_debugger(); - goto errorout; - } - } - - /* create the connection file handle / object */ - status = KsOpenConnection( - &(tconn->kstc_dev), - (CONNECTION_CONTEXT)tconn, - &(tconn->sender.kstc_info.Handle), - &(tconn->sender.kstc_info.FileObject) - ); - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* associdate the the connection with the adress object of the tconn */ - - status = KsAssociateAddress( - tconn->kstc_addr.Handle, - tconn->sender.kstc_info.FileObject - ); - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - tconn->kstc_state = ksts_associated; - - /* Allocating Connection Info Together with the Address */ - AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) - + TDI_ADDRESS_LENGTH_IP; - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK'); - - if (NULL == ConnectionInfo) { - - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* Initializing ConnectionInfo ... */ - { - PTRANSPORT_ADDRESS TdiAddress; - - /* ConnectionInfo settings */ - - ConnectionInfo->UserDataLength = 0; - ConnectionInfo->UserData = NULL; - ConnectionInfo->OptionsLength = 0; - ConnectionInfo->Options = NULL; - ConnectionInfo->RemoteAddressLength = AddrLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - - - /* intialize the tdi address*/ - - TdiAddress = ConnectionInfo->RemoteAddress; - - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); - - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - } - - /* Now prepare to connect the remote peer ... */ - - ConnectionObject = tconn->sender.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - /* allocate a new Irp */ - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* setup the Irp */ - - TdiBuildConnect( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - NULL, - ConnectionInfo, - NULL - ); - - - /* sumbit the Irp to the underlying transport driver */ - status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - NULL - ); - - spin_lock(&(tconn->kstc_lock)); - - if (NT_SUCCESS(status)) { - - /* Connected! the conneciton is built successfully. */ - - tconn->kstc_state = ksts_connected; - - tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo; - tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress; - - spin_unlock(&(tconn->kstc_lock)); - - } else { - - /* Not connected! Abort it ... */ - - if (rc != 0) { - cfs_enter_debugger(); - } - - Irp = NULL; - rc = cfs_error_code(status); - - tconn->kstc_state = ksts_associated; - spin_unlock(&(tconn->kstc_lock)); - - /* disassocidate the connection and the address object, - after cleanup, it's safe to set the state to abort ... */ - - if ( NT_SUCCESS(KsDisassociateAddress( - tconn->sender.kstc_info.FileObject))) { - tconn->kstc_state = ksts_aborted; - } - - /* reset the event callbacks */ - rc = ks_reset_handlers(tconn); - - goto errorout; - } - -errorout: - - if (NT_SUCCESS(status)) { - - ks_query_local_ipaddr(tconn); - - } else { - - if (ConnectionInfo) { - ExFreePool(ConnectionInfo); - } - if (Irp) { - IoFreeIrp(Irp); - } - } - - ks_put_tconn(tconn); - - return (rc); -} - - -/* - * ks_disconnect_tconn - * disconnect the tconn from a connection - * - * Arguments: - * tconn: the tdi connecton object connected already - * flags: flags & options for disconnecting - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_disconnect_tconn( - ksock_tconn_t * tconn, - ulong_ptr flags - ) -{ - NTSTATUS status = STATUS_SUCCESS; - - ksock_tconn_info_t * info; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - - KEVENT Event; - - ks_get_tconn(tconn); - - /* make sure tt's connected already and it - must be a sender or a child ... */ - - LASSERT(tconn->kstc_state == ksts_connected); - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); - - /* reset all the event handlers to NULL */ - - if (tconn->kstc_type != kstt_child) { - ks_reset_handlers (tconn); - } - - /* Disconnecting to the remote peer ... */ - - if (tconn->kstc_type == kstt_sender) { - info = &(tconn->sender.kstc_info); - } else { - info = &(tconn->child.kstc_info); - } - - ConnectionObject = info->FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - /* allocate an Irp and setup it */ - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); - - TdiBuildDisconnect( - Irp, - DeviceObject, - ConnectionObject, - KsDisconectCompletionRoutine, - &Event, - NULL, - flags, - NULL, - NULL - ); - - /* issue the Irp to the underlying transport - driver to disconnect the connection */ - - status = IoCallDriver(DeviceObject, Irp); - - if (STATUS_PENDING == status) { - - status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - status = Irp->IoStatus.Status; - } - - KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n", - status, KsNtStatusToString(status))); - - IoFreeIrp(Irp); - - if (info->ConnectionInfo) { - - /* disassociate the association between connection/address objects */ - - status = KsDisassociateAddress(ConnectionObject); - - if (!NT_SUCCESS(status)) { - cfs_enter_debugger(); - } - - spin_lock(&(tconn->kstc_lock)); - - /* cleanup the tsdumgr Lists */ - KsCleanupTsdu (tconn); - - /* set the state of the tconn */ - if (NT_SUCCESS(status)) { - tconn->kstc_state = ksts_disconnected; - } else { - tconn->kstc_state = ksts_associated; - } - - /* free the connection info to system pool*/ - ExFreePool(info->ConnectionInfo); - info->ConnectionInfo = NULL; - info->Remote = NULL; - - spin_unlock(&(tconn->kstc_lock)); - } - - status = STATUS_SUCCESS; - -errorout: - - ks_put_tconn(tconn); - - return cfs_error_code(status); -} - - -/* - * ks_abort_tconn - * The connection is broken un-expectedly. We need do - * some cleanup. - * - * Arguments: - * tconn: the tdi connection - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_abort_tconn( - ksock_tconn_t * tconn - ) -{ - PKS_DISCONNECT_WORKITEM WorkItem = NULL; - - WorkItem = &(tconn->kstc_disconnect); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - if (tconn->kstc_state != ksts_connected) { - ks_put_tconn(tconn); - } else { - - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - - WorkItem->Flags = TDI_DISCONNECT_ABORT; - WorkItem->tconn = tconn; - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - - ExQueueWorkItem( - &(WorkItem->WorkItem), - DelayedWorkQueue - ); - } - } - - spin_unlock(&(tconn->kstc_lock)); -} - - -/* - * ks_query_local_ipaddr - * query the local connection ip address - * - * Arguments: - * tconn: the tconn which is connected - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_query_local_ipaddr( - ksock_tconn_t * tconn - ) -{ - PFILE_OBJECT FileObject = NULL; - NTSTATUS status; - - PTRANSPORT_ADDRESS TdiAddress; - ULONG AddressLength; - - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else if (tconn->kstc_type == kstt_child) { - FileObject = tconn->child.kstc_info.FileObject; - } else { - status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - TdiAddress = &(tconn->kstc_addr.Tdi); - AddressLength = MAX_ADDRESS_LENGTH; - - status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength); - - if (NT_SUCCESS(status)) { - - KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n", - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr, - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port )); - } else { - KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n")); - } - -errorout: - - return cfs_error_code(status); -} - -/* - * ks_send_mdl - * send MDL chain to the peer for a stream connection - * - * Arguments: - * tconn: tdi connection object - * tx: the transmit context - * mdl: the mdl chain containing the data - * len: length of the data - * flags: flags of the transmission - * - * Return Value: - * ks return code - * - * Notes: - * N/A - */ - -int -ks_send_mdl( - ksock_tconn_t * tconn, - void * tx, - ksock_mdl_t * mdl, - int len, - int flags - ) -{ - NTSTATUS Status; - int rc = 0; - ulong_ptr length; - ulong_ptr tflags; - ksock_tdi_tx_t * context; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_DAT KsTsduDat; - - BOOLEAN bNewTsdu = FALSE; /* newly allocated */ - BOOLEAN bNewBuff = FALSE; /* newly allocated */ - - BOOLEAN bBuffed; /* bufferred sending */ - - PUCHAR Buffer = NULL; - ksock_mdl_t * NewMdl = NULL; - - PIRP Irp = NULL; - PFILE_OBJECT ConnObject; - PDEVICE_OBJECT DeviceObject; - - BOOLEAN bIsNonBlock; - - ks_get_tconn(tconn); - - tflags = ks_tdi_send_flags(flags); - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - - spin_lock(&tconn->kstc_lock); - - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child ); - - if (tconn->kstc_state != ksts_connected) { - spin_unlock(&tconn->kstc_lock); - ks_put_tconn(tconn); - return -ENOTCONN; - } - - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_send); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_send); - } - - if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - - if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) { - bBuffed = TRUE; - } else { - bBuffed = FALSE; - } - - /* do the preparation work for bufferred sending */ - - if (bBuffed) { - - /* if the data is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ - - if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { - - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - - /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */ - if (bNewBuff) { - if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE((ULONG)len) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } - } - - /* if there's no Tsdu or the free size is not enough for the - KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */ - - if (NULL == KsTsdu) { - - KsTsdu = KsAllocateKsTsdu(); - - if (NULL == KsTsdu) { - bBuffed = FALSE; - bNewBuff = FALSE; - } else { - bNewTsdu = TRUE; - } - } - - /* process the case that a new buffer is to be allocated from system memory */ - if (bNewBuff) { - - /* now allocating internal buffer to contain the payload */ - Buffer = ExAllocatePool(NonPagedPool, len); - - if (NULL == Buffer) { - bBuffed = FALSE; - } - } - } - - if (bBuffed) { - - if (bNewBuff) { - - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->DataLength = (ULONG)len; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - } else { - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduDat->TsduFlags = 0; - KsTsduDat->DataLength = (ULONG)len; - KsTsduDat->StartOffset = 0; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len); - - Buffer = &KsTsduDat->Data[0]; - } - - /* now locking the Buffer and copy user payload into the buffer */ - ASSERT(Buffer != NULL); - - rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl); - if (rc != 0) { - printk("ks_send_mdl: bufferred: error allocating mdl.\n"); - bBuffed = FALSE; - } else { - ULONG BytesCopied = 0; - TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied); - if (BytesCopied != (ULONG) len) { - bBuffed = FALSE; - } - } - - /* Do the finializing job if we succeed to to lock the buffer and move - user data. Or we need do cleaning up ... */ - if (bBuffed) { - - if (bNewBuff) { - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); - - } else { - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsdu->LastOffset += KsTsduDat->TotalLength; - } - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } - - } else { - - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } - - if (bNewBuff) { - ExFreePool(Buffer); - Buffer = NULL; - bNewBuff = FALSE; - } - } - } - - /* update the TotalBytes being in sending */ - KsTsduMgr->TotalBytes += (ULONG)len; - - spin_unlock(&tconn->kstc_lock); - - /* cleanup the Tsdu if not successful */ - if (!bBuffed && bNewTsdu) { - KsPutKsTsdu(KsTsdu); - bNewTsdu = FALSE; - KsTsdu = NULL; - } - - /* we need allocate the ksock_tx_t structure from memory pool. */ - - context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0); - if (!context) { - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - /* intialize the TcpContext */ - - memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT)); - - context->tconn = tconn; - context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t)); - - KeInitializeEvent(context->Event, SynchronizationEvent, FALSE); - - if (bBuffed) { - - /* for bufferred transmission, we need set - the internal completion routine. */ - - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->KsTsduMgr = KsTsduMgr; - context->CompletionContext = KsTsdu; - context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat); - context->bCounted = FALSE; - - } else if (bIsNonBlock) { - - /* for non-blocking transmission, we need set - the internal completion routine too. */ - - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->CompletionContext = tx; - context->KsTsduMgr = KsTsduMgr; - context->bCounted = TRUE; - context->ReferCount = 2; - } - - if (tconn->kstc_type == kstt_sender) { - ConnObject = tconn->sender.kstc_info.FileObject; - } else { - LASSERT(tconn->kstc_type == kstt_child); - ConnObject = tconn->child.kstc_info.FileObject; - } - - DeviceObject = IoGetRelatedDeviceObject(ConnObject); - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } - - length = KsQueryMdlsSize(mdl); - - LASSERT((ULONG)len <= length); - - ks_get_tconn(tconn); - - TdiBuildSend( - Irp, - DeviceObject, - ConnObject, - KsTcpCompletionRoutine, - context, - (bBuffed ? NewMdl : mdl), - (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags), - (ULONG)len; - ); - - Status = IoCallDriver(DeviceObject, Irp); - - if (bBuffed) { - ks_release_mdl(mdl, FALSE); - NewMdl = NULL; - } - - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - rc = cfs_error_code(Status); - goto errorout; - } - - if (bBuffed) { - Status = STATUS_SUCCESS; - rc = len; - context = NULL; - } else { - if (bIsNonBlock) { - if (InterlockedDecrement(&context->ReferCount) == 0) { - Status = Irp->IoStatus.Status; - } else { - Status = STATUS_PENDING; - context = NULL; - } - } else { - if (STATUS_PENDING == Status) { - Status = KeWaitForSingleObject( - context->Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - if (NT_SUCCESS(Status)) { - Status = Irp->IoStatus.Status; - } - } - } - - if (Status == STATUS_SUCCESS) { - rc = (int)(Irp->IoStatus.Information); - - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); - - } else { - rc = cfs_error_code(Status); - } - } - -errorout: - - if (bBuffed) { - - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } - - if (bNewBuff) { - if (!NT_SUCCESS(Status)) { - ExFreePool(Buffer); - Buffer = NULL; - } - } - - } else { - - if (Status != STATUS_PENDING) { - - if (Irp) { - - /* Freeing the Irp ... */ - - IoFreeIrp(Irp); - Irp = NULL; - } - } - } - - if (!NT_SUCCESS(Status)) { - - spin_lock(&tconn->kstc_lock); - - KsTsduMgr->TotalBytes -= (ULONG)len; - - if (bBuffed) { - - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { - - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - KsPutKsTsdu(KsTsdu); - } else { - if (bNewBuff) { - if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF); - KsTsduBuf->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduBuf->StartOffset = KsTsduBuf->DataLength; - } - } else { - if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= KsTsduDat->TotalLength; - KsTsduDat->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduDat->StartOffset = KsTsduDat->DataLength; - } - } - } - } - - spin_unlock(&tconn->kstc_lock); - } - - /* free the context if is not used at all */ - if (context) { - cfs_free(context); - } - - ks_put_tconn(tconn); - - return rc; -} - -/* - * ks_recv_mdl - * Receive data from the peer for a stream connection - * - * Arguments: - * tconn: tdi connection object - * mdl: the mdl chain to contain the incoming data - * len: length of the data - * flags: flags of the receiving - * - * Return Value: - * ks return code - * - * Notes: - * N/A - */ - -int -ks_recv_mdl( - ksock_tconn_t * tconn, - ksock_mdl_t * mdl, - int size, - int flags - ) -{ - NTSTATUS Status = STATUS_SUCCESS; - int rc = 0; - - BOOLEAN bIsNonBlock; - BOOLEAN bIsExpedited; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; - - PUCHAR Buffer; - - ULONG BytesRecved = 0; - ULONG RecvedOnce; - - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - bIsExpedited = cfs_is_flag_set(flags, MSG_OOB); - - ks_get_tconn(tconn); - -Again: - - RecvedOnce = 0; - - spin_lock(&(tconn->kstc_lock)); - - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - - if (tconn->kstc_state != ksts_connected) { - - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - -NextTsdu: - - if (list_empty(&(KsTsduMgr->TsduList))) { - - // - // It's a notification event. We need reset it to - // un-signaled state in case there no any tsdus. - // - - KeResetEvent(&(KsTsduMgr->Event)); - - } else { - - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - /* remove the KsTsdu from TsduMgr list to release the lock */ - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - spin_unlock(&(tconn->kstc_lock)); - - while ((ULONG)size > BytesRecved) { - - ULONG BytesCopied = 0; - ULONG BytesToCopy = 0; - ULONG StartOffset = 0; - - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - - if ( TSDU_TYPE_DAT == KsTsduDat->TsduType || - TSDU_TYPE_BUF == KsTsduBuf->TsduType ) { - - - // - // Data Tsdu Unit ... - // - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat); - break; - } - - Buffer = &KsTsduDat->Data[0]; - StartOffset = KsTsduDat->StartOffset; - if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset; - } - - } else { - - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf); - break; - } - - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - Buffer = KsTsduBuf->UserBuffer; - StartOffset = KsTsduBuf->StartOffset; - - if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset; - } - } - - if (BytesToCopy > 0) { - Status = TdiCopyBufferToMdl( - Buffer, - StartOffset, - BytesToCopy, - mdl, - BytesRecved, - &BytesCopied - ); - - if (NT_SUCCESS(Status)) { - - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } - - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; - - } else { - - cfs_enter_debugger(); - - if (STATUS_BUFFER_OVERFLOW == Status) { - } - } - } - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - - KsTsduDat->StartOffset += BytesCopied; - - if (KsTsduDat->StartOffset == KsTsduDat->DataLength) { - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } - - } else { - - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - KsTsduBuf->StartOffset += BytesCopied; - if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) { - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - /* now we need release the buf to system pool */ - ExFreePool(KsTsduBuf->UserBuffer); - } - } - - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - - // - // MDL Tsdu Unit ... - // - - if (KsTsduMdl->DataLength > size - BytesRecved) { - - /* Recvmsg requst could be statisfied ... */ - - BytesToCopy = size - BytesRecved; - - } else { - - BytesToCopy = KsTsduMdl->DataLength; - } - - Status = KsCopyMdlChainToMdlChain( - KsTsduMdl->Mdl, - KsTsduMdl->StartOffset, - mdl, - BytesRecved, - BytesToCopy, - &BytesCopied - ); - - if (NT_SUCCESS(Status)) { - - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } - - KsTsduMdl->StartOffset += BytesCopied; - KsTsduMdl->DataLength -= BytesCopied; - - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; - } else { - cfs_enter_debugger(); - } - - if (0 == KsTsduMdl->DataLength) { - - // - // Call TdiReturnChainedReceives to release the Tsdu memory - // - - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); - - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); - } - - } else { - printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n", - KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength); - printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x", - KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength); - cfs_enter_debugger(); - } - - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - - // - // KsTsdu is empty now, we need free it ... - // - - KsPutKsTsdu(KsTsdu); - KsTsdu = NULL; - - break; - } - } - - spin_lock(&(tconn->kstc_lock)); - - /* we need attach the KsTsdu to the list header */ - if (KsTsdu) { - KsTsduMgr->NumOfTsdu++; - list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - } else if ((ULONG)size > BytesRecved) { - goto NextTsdu; - } - } - - if (KsTsduMgr->TotalBytes < RecvedOnce) { - cfs_enter_debugger(); - KsTsduMgr->TotalBytes = 0; - } else { - KsTsduMgr->TotalBytes -= RecvedOnce; - } - - spin_unlock(&(tconn->kstc_lock)); - - if (NT_SUCCESS(Status)) { - - if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) { - - KeWaitForSingleObject( - &(KsTsduMgr->Event), - Executive, - KernelMode, - FALSE, - NULL - ); - - goto Again; - } - - if (bIsNonBlock && (BytesRecved == 0)) { - rc = -EAGAIN; - } else { - rc = BytesRecved; - } - } - -errorout: - - ks_put_tconn(tconn); - - if (rc > 0) { - KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc)); - } else { - KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status)); - } - - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); - - return (rc); -} - - -/* - * ks_init_tdi_data - * initialize the global data in ksockal_data - * - * Arguments: - * N/A - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -int -ks_init_tdi_data() -{ - int rc = 0; - - /* initialize tconn related globals */ - RtlZeroMemory(&ks_data, sizeof(ks_data_t)); - - spin_lock_init(&ks_data.ksnd_tconn_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns); - cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE); - - ks_data.ksnd_tconn_slab = cfs_mem_cache_create( - "tcon", sizeof(ksock_tconn_t) , 0, 0); - - if (!ks_data.ksnd_tconn_slab) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize tsdu related globals */ - - spin_lock_init(&ks_data.ksnd_tsdu_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus); - ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */ - ks_data.ksnd_tsdu_slab = cfs_mem_cache_create( - "tsdu", ks_data.ksnd_tsdu_size, 0, 0); - - if (!ks_data.ksnd_tsdu_slab) { - rc = -ENOMEM; - cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); - ks_data.ksnd_tconn_slab = NULL; - goto errorout; - } - - /* initialize daemon related globals */ - - spin_lock_init(&ks_data.ksnd_daemon_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons); - cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE); - - KsRegisterPnpHandlers(); - -errorout: - - return rc; -} - - -/* - * ks_fini_tdi_data - * finalize the global data in ksockal_data - * - * Arguments: - * N/A - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ - -void -ks_fini_tdi_data() -{ - PKS_TSDU KsTsdu = NULL; - struct list_head * list = NULL; - - /* clean up the pnp handler and address slots */ - KsDeregisterPnpHandlers(); - - /* we need wait until all the tconn are freed */ - spin_lock(&(ks_data.ksnd_tconn_lock)); - - if (list_empty(&(ks_data.ksnd_tconns))) { - cfs_wake_event(&ks_data.ksnd_tconn_exit); - } - spin_unlock(&(ks_data.ksnd_tconn_lock)); - - /* now wait on the tconn exit event */ - cfs_wait_event(&ks_data.ksnd_tconn_exit, 0); - - /* it's safe to delete the tconn slab ... */ - cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); - ks_data.ksnd_tconn_slab = NULL; - - /* clean up all the tsud buffers in the free list */ - spin_lock(&(ks_data.ksnd_tsdu_lock)); - list_for_each (list, &ks_data.ksnd_freetsdus) { - KsTsdu = list_entry (list, KS_TSDU, Link); - - cfs_mem_cache_free( - ks_data.ksnd_tsdu_slab, - KsTsdu ); - } - spin_unlock(&(ks_data.ksnd_tsdu_lock)); - - /* it's safe to delete the tsdu slab ... */ - cfs_mem_cache_destroy(ks_data.ksnd_tsdu_slab); - ks_data.ksnd_tsdu_slab = NULL; - - /* good! it's smooth to do the cleaning up...*/ -} - -/* - * ks_create_child_tconn - * Create the backlog child connection for a listener - * - * Arguments: - * parent: the listener daemon connection - * - * Return Value: - * the child connection or NULL in failure - * - * Notes: - * N/A - */ - -ksock_tconn_t * -ks_create_child_tconn( - ksock_tconn_t * parent - ) -{ - NTSTATUS status; - ksock_tconn_t * backlog; - - /* allocate the tdi connecton object */ - backlog = ks_create_tconn(); - - if (!backlog) { - goto errorout; - } - - /* initialize the tconn as a child */ - ks_init_child(backlog); - - - /* now bind it */ - if (ks_bind_tconn(backlog, parent, 0, 0) < 0) { - ks_free_tconn(backlog); - backlog = NULL; - goto errorout; - } - - /* open the connection object */ - status = KsOpenConnection( - &(backlog->kstc_dev), - (PVOID)backlog, - &(backlog->child.kstc_info.Handle), - &(backlog->child.kstc_info.FileObject) - ); - - if (!NT_SUCCESS(status)) { - - ks_put_tconn(backlog); - backlog = NULL; - cfs_enter_debugger(); - goto errorout; - } - - /* associate it now ... */ - status = KsAssociateAddress( - backlog->kstc_addr.Handle, - backlog->child.kstc_info.FileObject - ); - - if (!NT_SUCCESS(status)) { - - ks_put_tconn(backlog); - backlog = NULL; - cfs_enter_debugger(); - goto errorout; - } - - backlog->kstc_state = ksts_associated; - -errorout: - - return backlog; -} - -/* - * ks_replenish_backlogs( - * to replenish the backlogs listening... - * - * Arguments: - * tconn: the parent listen tdi connect - * nbacklog: number fo child connections in queue - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ - -void -ks_replenish_backlogs( - ksock_tconn_t * parent, - int nbacklog - ) -{ - ksock_tconn_t * backlog; - int n = 0; - - /* calculate how many backlogs needed */ - if ( ( parent->listener.kstc_listening.num + - parent->listener.kstc_accepted.num ) < nbacklog ) { - n = nbacklog - ( parent->listener.kstc_listening.num + - parent->listener.kstc_accepted.num ); - } else { - n = 0; - } - - while (n--) { - - /* create the backlog child tconn */ - backlog = ks_create_child_tconn(parent); - - spin_lock(&(parent->kstc_lock)); - - if (backlog) { - spin_lock(&backlog->kstc_lock); - /* attch it into the listing list of daemon */ - list_add( &backlog->child.kstc_link, - &parent->listener.kstc_listening.list ); - parent->listener.kstc_listening.num++; - - backlog->child.kstc_queued = TRUE; - spin_unlock(&backlog->kstc_lock); - } else { - cfs_enter_debugger(); - } - - spin_unlock(&(parent->kstc_lock)); - } -} - -/* - * ks_start_listen - * setup the listener tdi connection and make it listen - * on the user specified ip address and port. - * - * Arguments: - * tconn: the parent listen tdi connect - * nbacklog: number fo child connections in queue - * - * Return Value: - * ks error code >=: success; otherwise error. - * - * Notes: - * N/A - */ - -int -ks_start_listen(ksock_tconn_t *tconn, int nbacklog) -{ - int rc = 0; - - /* now replenish the backlogs */ - ks_replenish_backlogs(tconn, nbacklog); - - /* set the event callback handlers */ - rc = ks_set_handlers(tconn); - - if (rc < 0) { - return rc; - } - - spin_lock(&(tconn->kstc_lock)); - tconn->listener.nbacklog = nbacklog; - tconn->kstc_state = ksts_listening; - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); - spin_unlock(&(tconn->kstc_lock)); - - return rc; -} - -void -ks_stop_listen(ksock_tconn_t *tconn) -{ - struct list_head * list; - ksock_tconn_t * backlog; - - /* reset all tdi event callbacks to NULL */ - ks_reset_handlers (tconn); - - spin_lock(&tconn->kstc_lock); - - cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); - - /* cleanup all the listening backlog child connections */ - list_for_each (list, &(tconn->listener.kstc_listening.list)) { - backlog = list_entry(list, ksock_tconn_t, child.kstc_link); - - /* destory and free it */ - ks_put_tconn(backlog); - } - - spin_unlock(&tconn->kstc_lock); - - /* wake up it from the waiting on new incoming connections */ - KeSetEvent(&tconn->listener.kstc_accept_event, 0, FALSE); - - /* free the listening daemon tconn */ - ks_put_tconn(tconn); -} - - -/* - * ks_wait_child_tconn - * accept a child connection from peer - * - * Arguments: - * parent: the daemon tdi connection listening - * child: to contain the accepted connection - * - * Return Value: - * ks error code; - * - * Notes: - * N/A - */ - -int -ks_wait_child_tconn( - ksock_tconn_t * parent, - ksock_tconn_t ** child - ) -{ - struct list_head * tmp; - ksock_tconn_t * backlog = NULL; - - ks_replenish_backlogs(parent, parent->listener.nbacklog); - - spin_lock(&(parent->kstc_lock)); - - if (parent->listener.kstc_listening.num <= 0) { - spin_unlock(&(parent->kstc_lock)); - return -1; - } - -again: - - /* check the listening queue and try to search the accepted connecton */ - - list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link); - - spin_lock(&(backlog->kstc_lock)); - - if (backlog->child.kstc_accepted) { - - LASSERT(backlog->kstc_state == ksts_connected); - LASSERT(backlog->child.kstc_busy); - - list_del(&(backlog->child.kstc_link)); - list_add(&(backlog->child.kstc_link), - &(parent->listener.kstc_accepted.list)); - parent->listener.kstc_accepted.num++; - parent->listener.kstc_listening.num--; - backlog->child.kstc_queueno = 1; - - spin_unlock(&(backlog->kstc_lock)); - - break; - } else { - spin_unlock(&(backlog->kstc_lock)); - backlog = NULL; - } - } - - spin_unlock(&(parent->kstc_lock)); - - /* we need wait until new incoming connections are requested - or the case of shuting down the listenig daemon thread */ - if (backlog == NULL) { - - NTSTATUS Status; - - Status = KeWaitForSingleObject( - &(parent->listener.kstc_accept_event), - Executive, - KernelMode, - FALSE, - NULL - ); - - spin_lock(&(parent->kstc_lock)); - - /* check whether it's exptected to exit ? */ - if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) { - spin_unlock(&(parent->kstc_lock)); - } else { - goto again; - } - } - - if (backlog) { - /* query the local ip address of the connection */ - ks_query_local_ipaddr(backlog); - } - - *child = backlog; - - return 0; -} - -int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - spin_lock(&ks_data.ksnd_addrs_lock); - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - if (_stricmp(name, &slot->iface[0]) == 0) { - *up = slot->up; - *ip = slot->ip_addr; - *mask = slot->netmask; - break; - } - list = list->Flink; - slot = NULL; - } - - spin_unlock(&ks_data.ksnd_addrs_lock); - - return (int)(slot == NULL); -} - -int libcfs_ipif_enumerate(char ***names) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - int nips = 0; - - spin_lock(&ks_data.ksnd_addrs_lock); - - *names = cfs_alloc(sizeof(char *) * ks_data.ksnd_naddrs, CFS_ALLOC_ZERO); - if (*names == NULL) { - goto errorout; - } - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - list = list->Flink; - (*names)[nips++] = slot->iface; - cfs_assert(nips <= ks_data.ksnd_naddrs); - } - - cfs_assert(nips == ks_data.ksnd_naddrs); - -errorout: - - spin_unlock(&ks_data.ksnd_addrs_lock); - return nips; -} - -void libcfs_ipif_free_enumeration(char **names, int n) -{ - if (names) { - cfs_free(names); - } -} - -int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog) -{ - int rc = 0; - ksock_tconn_t * parent; - - parent = ks_create_tconn(); - if (!parent) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize the tconn as a listener */ - ks_init_listener(parent); - - /* bind the daemon->tconn */ - rc = ks_bind_tconn(parent, NULL, ip, (unsigned short)port); - - if (rc < 0) { - ks_free_tconn(parent); - goto errorout; - } - - /* create listening children and make it to listen state*/ - rc = ks_start_listen(parent, backlog); - if (rc < 0) { - ks_stop_listen(parent); - goto errorout; - } - - *sockp = parent; - -errorout: - - return rc; -} - -int libcfs_sock_accept(struct socket **newsockp, struct socket *sock) -{ - /* wait for incoming connecitons */ - return ks_wait_child_tconn(sock, newsockp); -} - -void libcfs_sock_abort_accept(struct socket *sock) -{ - LASSERT(sock->kstc_type == kstt_listener); - - spin_lock(&(sock->kstc_lock)); - - /* clear the daemon flag */ - cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED); - - /* wake up it from the waiting on new incoming connections */ - KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE); - - spin_unlock(&(sock->kstc_lock)); -} - -/* - * libcfs_sock_connect - * build a conntion between local ip/port and the peer ip/port. - * - * Arguments: - * laddr: local ip address - * lport: local port number - * paddr: peer's ip address - * pport: peer's port number - * - * Return Value: - * int: return code ... - * - * Notes: - * N/A - */ - - -int libcfs_sock_connect(struct socket **sockp, int *fatal, - __u32 local_ip, int local_port, - __u32 peer_ip, int peer_port) -{ - ksock_tconn_t * tconn = NULL; - int rc = 0; - - *sockp = NULL; - - KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n", - peer_ip, peer_port, local_ip, local_port )); - - /* create the tdi connecion structure */ - tconn = ks_create_tconn(); - if (!tconn) { - rc = -ENOMEM; - goto errorout; - } - - /* initialize the tdi sender connection */ - ks_init_sender(tconn); - - /* bind the local ip address with the tconn */ - rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port); - if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n", - local_ip, local_port )); - ks_free_tconn(tconn); - goto errorout; - } - - /* connect to the remote peer */ - rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port); - if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n", - peer_ip, peer_port )); - - ks_put_tconn(tconn); - goto errorout; - } - - *sockp = tconn; - -errorout: - - return rc; -} - -int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize) -{ - return 0; -} - -int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize) -{ - return 0; -} - -int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port) -{ - PTRANSPORT_ADDRESS taddr = NULL; - - spin_lock(&socket->kstc_lock); - if (remote) { - if (socket->kstc_type == kstt_sender) { - taddr = socket->sender.kstc_info.Remote; - } else if (socket->kstc_type == kstt_child) { - taddr = socket->child.kstc_info.Remote; - } - } else { - taddr = &(socket->kstc_addr.Tdi); - } - - if (taddr) { - PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address)); - if (ip != NULL) - *ip = ntohl (addr->in_addr); - if (port != NULL) - *port = ntohs (addr->sin_port); - } else { - spin_unlock(&socket->kstc_lock); - return -ENOTCONN; - } - - spin_unlock(&socket->kstc_lock); - return 0; -} - -int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - ksock_mdl_t * mdl; - - int offset = 0; - - while (nob > offset) { - - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoReadAccess, &mdl ); - - if (rc < 0) { - return (rc); - } - - /* send out the whole mdl */ - rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 ); - - if (rc > 0) { - offset += rc; - } else { - return (rc); - } - } - - return (0); -} - -int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout) -{ - int rc; - ksock_mdl_t * mdl; - - int offset = 0; - - while (nob > offset) { - - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoWriteAccess, &mdl ); - - if (rc < 0) { - return (rc); - } - - /* recv the requested buffer */ - rc = ks_recv_mdl( sock, mdl, nob - offset, 0 ); - - if (rc > 0) { - offset += rc; - } else { - return (rc); - } - } - - return (0); -} - -void libcfs_sock_release(struct socket *sock) -{ - if (sock->kstc_type == kstt_listener && - sock->kstc_state == ksts_listening) { - ks_stop_listen(sock); - } else { - ks_put_tconn(sock); - } -} diff --git a/lnet/libcfs/winnt/winnt-tracefile.c b/lnet/libcfs/winnt/winnt-tracefile.c deleted file mode 100644 index d172bff1909930091908d574d3f8947c782e10b8..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-tracefile.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#define LUSTRE_TRACEFILE_PRIVATE - -#include <libcfs/libcfs.h> -#include <libcfs/kp30.h> -#include "tracefile.h" - -#ifndef get_cpu -#define get_cpu() smp_processor_id() -#define put_cpu() do { } while (0) -#endif - -extern union trace_data_union trace_data[NR_CPUS]; -extern char *tracefile; -extern int64_t tracefile_size; - -event_t tracefile_event; - -void tracefile_init_arch() -{ - int i; - int j; - - cfs_init_event(&tracefile_event, TRUE, TRUE); - - memset(trace_console_buffers, 0, sizeof(trace_console_buffers)); - - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 1; j++) { - trace_console_buffers[i][j] = - cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE, - CFS_ALLOC_ZERO); - - if (trace_console_buffers[i][j] == NULL) { - tracefile_fini_arch(); - KsPrint((0, "Can't allocate console message buffer\n")); - return -ENOMEM; - } - } - } - - return 0; -} - -void tracefile_fini_arch() -{ - int i; - int j; - - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 2; j++) { - if (trace_console_buffers[i][j] != NULL) { - cfs_free(trace_console_buffers[i][j]); - trace_console_buffers[i][j] = NULL; - } - } - } -} - -void tracefile_read_lock() -{ - cfs_wait_event(&tracefile_event, 0); -} - -void tracefile_read_unlock() -{ - cfs_wake_event(&tracefile_event); -} - -void tracefile_write_lock() -{ - cfs_wait_event(&tracefile_event, 0); -} - -void tracefile_write_unlock() -{ - cfs_wake_event(&tracefile_event); -} - -char * -trace_get_console_buffer(void) -{ -#pragma message ("is there possible problem with pre-emption ?") - int cpu = (int) KeGetCurrentProcessorNumber(); - return trace_console_buffers[cpu][0]; -} - -void -trace_put_console_buffer(char *buffer) -{ -} - -struct trace_cpu_data * -trace_get_tcd(void) -{ -#pragma message("todo: return NULL if in interrupt context") - - int cpu = (int) KeGetCurrentProcessorNumber(); - return &trace_data[cpu].tcd; -} - -void -trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) -{ -} - -void -set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) -{ - struct timeval tv; - - do_gettimeofday(&tv); - - header->ph_subsys = subsys; - header->ph_mask = mask; - header->ph_cpu_id = smp_processor_id(); - header->ph_sec = (__u32)tv.tv_sec; - header->ph_usec = tv.tv_usec; - header->ph_stack = stack; - header->ph_pid = current->pid; - header->ph_line_num = line; - header->ph_extern_pid = 0; - return; -} - -void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) -{ - char *prefix = NULL, *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %s", ptype, prefix, buf); - } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf); - } - return; -} - -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - return 1; -} - - -int trace_write_daemon_file(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char *name; - unsigned long off; - int rc; - - name =cfs_alloc(count + 1, 0); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user((void *)name, (void*)buffer, count)) { - rc = -EFAULT; - goto out; - } - - /* be nice and strip out trailing '\n' */ - for (off = count ; off > 2 && isspace(name[off - 1]); off--) - ; - - name[off] = '\0'; - - tracefile_write_lock(); - if (strcmp(name, "stop") == 0) { - tracefile = NULL; - trace_stop_thread(); - goto out_sem; - } else if (strncmp(name, "size=", 5) == 0) { - tracefile_size = simple_strtoul(name + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; - goto out_sem; - } - - if (tracefile != NULL) - cfs_free(tracefile); - - tracefile = name; - name = NULL; - printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " - "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); - - trace_start_thread(); -out_sem: - tracefile_write_unlock(); -out: - if (name != NULL) - cfs_free(name); - return count; -} - -int trace_read_daemon_file(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - int rc; - - tracefile_read_lock(); - rc = snprintf(page, count, "%s", tracefile); - tracefile_read_unlock(); - - return rc; -} - -int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char string[32]; - int i; - unsigned max; - - if (count >= sizeof(string)) { - printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", - count); - return -EOVERFLOW; - } - - if (copy_from_user((void *)string, (void *)buffer, count)) - return -EFAULT; - - max = simple_strtoul(string, NULL, 0); - if (max == 0) - return -EINVAL; - - if (max > (num_physpages >> (20 - 2 - CFS_PAGE_SHIFT)) / 5 || max >= 512) { - printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%dMB, which is more than 80%% of available RAM (%lu)\n", - max, (num_physpages >> (20 - 2 - CFS_PAGE_SHIFT)) / 5); - return -EINVAL; - } - - max /= smp_num_cpus; - - for (i = 0; i < NR_CPUS; i++) { - struct trace_cpu_data *tcd; - tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max << (20 - CFS_PAGE_SHIFT); - } - return count; -} - -int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct trace_cpu_data *tcd; - int rc; - - tcd = trace_get_tcd(); - LASSERT (tcd != NULL); - rc = snprintf(page, count, "%lu\n", - (tcd->tcd_max_pages >> (20 - CFS_PAGE_SHIFT)) * smp_num_cpus); - trace_put_tcd(tcd); - return rc; -} - -void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) -{ -#error "tbd" -} - diff --git a/lnet/libcfs/winnt/winnt-usr.c b/lnet/libcfs/winnt/winnt-usr.c deleted file mode 100644 index f79347b8893ba48aa9e9399892f5c2195985b681..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-usr.c +++ /dev/null @@ -1,85 +0,0 @@ - -#ifndef __KERNEL__ - -#include <stdio.h> -#include <stdlib.h> -#include <io.h> -#include <time.h> -#include <windows.h> - -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - char *format, ...) { - } - -int cfs_proc_mknod(const char *path, unsigned short mode, unsigned int dev) -{ - return 0; -} - - -void print_last_error(char* Prefix) -{ - LPVOID lpMsgBuf; - - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - GetLastError(), - 0, - (LPTSTR) &lpMsgBuf, - 0, - NULL - ); - - printf("%s %s", Prefix, (LPTSTR) lpMsgBuf); - - LocalFree(lpMsgBuf); -} - -// -// The following declarations are defined in io.h of VC -// sys/types.h will conflict with io.h, so we need place -// these declartions here. - -#ifdef __cplusplus -extern "C" { -#endif - void - __declspec (naked) __cdecl _chkesp(void) - { -#if _X86_ - __asm { jz exit_chkesp }; - __asm { int 3 }; - exit_chkesp: - __asm { ret }; -#endif - } -#ifdef __cplusplus -} -#endif - -unsigned int sleep (unsigned int seconds) -{ - Sleep(seconds * 1000); - return 0; -} - -int gethostname(char * name, int namelen) -{ - return 0; -} - -int ioctl ( - int handle, - int cmd, - void *buffer - ) -{ - printf("hello, world\n"); - return 0; -} - -#endif /* __KERNEL__ */ \ No newline at end of file diff --git a/lnet/libcfs/winnt/winnt-utils.c b/lnet/libcfs/winnt/winnt-utils.c deleted file mode 100644 index cd33aa2a0dc5b28a0f5f33dbda37628fa9d2fdac..0000000000000000000000000000000000000000 --- a/lnet/libcfs/winnt/winnt-utils.c +++ /dev/null @@ -1,158 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (c) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or modify it under - * the terms of version 2 of the GNU General Public License as published by - * the Free Software Foundation. Lustre is distributed in the hope that it - * will be useful, but WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. You should have received a - * copy of the GNU General Public License along with Lustre; if not, write - * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, - * USA. - */ - - -/* - * miscellaneous libcfs stuff - */ -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/types.h> - -/* - * Convert server error code to client format. Error codes are from - * Linux errno.h, so for Linux client---identity. - */ -int convert_server_error(__u64 ecode) -{ - return cfs_error_code((NTSTATUS)ecode); -} - -/* - * convert <fcntl.h> flag from client to server. - * - * nt kernel uses several members to describe the open flags - * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions - * so it's better to convert when using, not here. - */ - -int convert_client_oflag(int cflag, int *result) -{ - *result = 0; - return 0; -} - - -int cfs_error_code(NTSTATUS Status) -{ - switch (Status) { - - case STATUS_ACCESS_DENIED: - return (-EACCES); - - case STATUS_ACCESS_VIOLATION: - return (-EFAULT); - - case STATUS_BUFFER_TOO_SMALL: - return (-ETOOSMALL); - - case STATUS_INVALID_PARAMETER: - return (-EINVAL); - - case STATUS_NOT_IMPLEMENTED: - case STATUS_NOT_SUPPORTED: - return (-EOPNOTSUPP); - - case STATUS_INVALID_ADDRESS: - case STATUS_INVALID_ADDRESS_COMPONENT: - return (-EADDRNOTAVAIL); - - case STATUS_NO_SUCH_DEVICE: - case STATUS_NO_SUCH_FILE: - case STATUS_OBJECT_NAME_NOT_FOUND: - case STATUS_OBJECT_PATH_NOT_FOUND: - case STATUS_NETWORK_BUSY: - case STATUS_INVALID_NETWORK_RESPONSE: - case STATUS_UNEXPECTED_NETWORK_ERROR: - return (-ENETDOWN); - - case STATUS_BAD_NETWORK_PATH: - case STATUS_NETWORK_UNREACHABLE: - case STATUS_PROTOCOL_UNREACHABLE: - return (-ENETUNREACH); - - case STATUS_LOCAL_DISCONNECT: - case STATUS_TRANSACTION_ABORTED: - case STATUS_CONNECTION_ABORTED: - return (-ECONNABORTED); - - case STATUS_REMOTE_DISCONNECT: - case STATUS_LINK_FAILED: - case STATUS_CONNECTION_DISCONNECTED: - case STATUS_CONNECTION_RESET: - case STATUS_PORT_UNREACHABLE: - return (-ECONNRESET); - - case STATUS_PAGEFILE_QUOTA: - case STATUS_NO_MEMORY: - case STATUS_CONFLICTING_ADDRESSES: - case STATUS_QUOTA_EXCEEDED: - case STATUS_TOO_MANY_PAGING_FILES: - case STATUS_INSUFFICIENT_RESOURCES: - case STATUS_WORKING_SET_QUOTA: - case STATUS_COMMITMENT_LIMIT: - case STATUS_TOO_MANY_ADDRESSES: - case STATUS_REMOTE_RESOURCES: - return (-ENOBUFS); - - case STATUS_INVALID_CONNECTION: - return (-ENOTCONN); - - case STATUS_PIPE_DISCONNECTED: - return (-ESHUTDOWN); - - case STATUS_TIMEOUT: - case STATUS_IO_TIMEOUT: - case STATUS_LINK_TIMEOUT: - return (-ETIMEDOUT); - - case STATUS_REMOTE_NOT_LISTENING: - case STATUS_CONNECTION_REFUSED: - return (-ECONNREFUSED); - - case STATUS_HOST_UNREACHABLE: - return (-EHOSTUNREACH); - - case STATUS_PENDING: - case STATUS_DEVICE_NOT_READY: - return (-EAGAIN); - - case STATUS_CANCELLED: - case STATUS_REQUEST_ABORTED: - return (-EINTR); - - case STATUS_BUFFER_OVERFLOW: - case STATUS_INVALID_BUFFER_SIZE: - return (-EMSGSIZE); - - } - - if (NT_SUCCESS(Status)) - return 0; - - return (-EINVAL); -} - - -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) -{ -} - -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) -{ - return NULL; -} diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/lnet/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/lnet/Info.plist b/lnet/lnet/Info.plist deleted file mode 100644 index 2b3967f0ff7429418bdbccc1ab3d3afd302e22a9..0000000000000000000000000000000000000000 --- a/lnet/lnet/Info.plist +++ /dev/null @@ -1,37 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>lnet</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.lnet</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> diff --git a/lnet/lnet/Makefile.in b/lnet/lnet/Makefile.in deleted file mode 100644 index 3bc86f6577534f02fe4dfc502eb592411f57d41a..0000000000000000000000000000000000000000 --- a/lnet/lnet/Makefile.in +++ /dev/null @@ -1,10 +0,0 @@ -MODULES := lnet - -lnet-objs := api-errno.o api-ni.o config.o -lnet-objs += lib-me.o lib-msg.o lib-eq.o lib-md.o -lnet-objs += lib-move.o module.o lo.o -lnet-objs += router.o router_proc.o acceptor.o peer.o - -default: all - -@INCLUDE_RULES@ diff --git a/lnet/lnet/acceptor.c b/lnet/lnet/acceptor.c deleted file mode 100644 index cf38645fdc55ff8e953e150791d68164c6b9afd8..0000000000000000000000000000000000000000 --- a/lnet/lnet/acceptor.c +++ /dev/null @@ -1,537 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#ifdef __KERNEL__ -static char *accept = "secure"; -CFS_MODULE_PARM(accept, "s", charp, 0444, - "Accept connections (secure|all|none)"); - -static int accept_port = 988; -CFS_MODULE_PARM(accept_port, "i", int, 0444, - "Acceptor's port (same on all nodes)"); - -static int accept_backlog = 127; -CFS_MODULE_PARM(accept_backlog, "i", int, 0444, - "Acceptor's listen backlog"); - -static int accept_timeout = 5; -CFS_MODULE_PARM(accept_timeout, "i", int, 0644, - "Acceptor's timeout (seconds)"); - -struct { - int pta_shutdown; - cfs_socket_t *pta_sock; - struct semaphore pta_signal; -} lnet_acceptor_state; - -int -lnet_acceptor_timeout(void) -{ - return accept_timeout; -} -EXPORT_SYMBOL(lnet_acceptor_timeout); - -int -lnet_acceptor_port(void) -{ - return accept_port; -} -EXPORT_SYMBOL(lnet_acceptor_port); - -void -lnet_connect_console_error (int rc, lnet_nid_t peer_nid, - __u32 peer_ip, int peer_port) -{ - switch (rc) { - /* "normal" errors */ - case -ECONNREFUSED: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u " - "on port %d was refused: " - "check that Lustre is running on that node.\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -EHOSTUNREACH: - case -ENETUNREACH: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u " - "was unreachable: the network or that node may " - "be down, or Lustre may be misconfigured.\n", - libcfs_nid2str(peer_nid), HIPQUAD(peer_ip)); - break; - case -ETIMEDOUT: - CDEBUG(D_NETERROR, "Connection to %s at host %u.%u.%u.%u on " - "port %d took too long: that node may be hung " - "or experiencing high load.\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -ECONNRESET: - LCONSOLE_ERROR("Connection to %s at host %u.%u.%u.%u on " - "port %d was reset: " - "is it running a compatible version of Lustre " - "and is %s one of its NIDs?\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port, - libcfs_nid2str(peer_nid)); - break; - case -EPROTO: - LCONSOLE_ERROR("Protocol error connecting to %s at host " - "%u.%u.%u.%u on port %d: " - "is it running a compatible version of Lustre?\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - case -EADDRINUSE: - LCONSOLE_ERROR("No privileged ports available to connect to " - "%s at host %u.%u.%u.%u on port %d\n", - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - default: - LCONSOLE_ERROR("Unexpected error %d connecting to %s at " - "host %u.%u.%u.%u on port %d\n", rc, - libcfs_nid2str(peer_nid), - HIPQUAD(peer_ip), peer_port); - break; - } -} -EXPORT_SYMBOL(lnet_connect_console_error); - -int -lnet_connect(cfs_socket_t **sockp, lnet_nid_t peer_nid, - __u32 local_ip, __u32 peer_ip, int peer_port) -{ - lnet_acceptor_connreq_t cr; - cfs_socket_t *sock; - int rc; - int port; - int fatal; - - CLASSERT (sizeof(cr) <= 16); /* not too big to be on the stack */ - - for (port = LNET_ACCEPTOR_MAX_RESERVED_PORT; - port >= LNET_ACCEPTOR_MIN_RESERVED_PORT; - --port) { - /* Iterate through reserved ports. */ - - rc = libcfs_sock_connect(&sock, &fatal, - local_ip, port, - peer_ip, peer_port); - if (rc != 0) { - if (fatal) - goto failed; - continue; - } - - CLASSERT (LNET_PROTO_ACCEPTOR_VERSION == 1); - - if (the_lnet.ln_ptlcompat != 2) { - /* When portals compatibility is "strong", simply - * connect (i.e. send no acceptor connection request). - * Othewise send an acceptor connection request. I can - * have no portals peers so everyone else should - * understand my protocol. */ - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr.acr_nid = peer_nid; - - if (the_lnet.ln_testprotocompat != 0) { - /* single-shot proto check */ - LNET_LOCK(); - if ((the_lnet.ln_testprotocompat & 4) != 0) { - cr.acr_version++; - the_lnet.ln_testprotocompat &= ~4; - } - if ((the_lnet.ln_testprotocompat & 8) != 0) { - cr.acr_magic = LNET_PROTO_MAGIC; - the_lnet.ln_testprotocompat &= ~8; - } - LNET_UNLOCK(); - } - - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - if (rc != 0) - goto failed_sock; - } - - *sockp = sock; - return 0; - } - - rc = -EADDRINUSE; - goto failed; - - failed_sock: - libcfs_sock_release(sock); - failed: - lnet_connect_console_error(rc, peer_nid, peer_ip, peer_port); - return rc; -} -EXPORT_SYMBOL(lnet_connect); - -static inline int -lnet_accept_magic(__u32 magic, __u32 constant) -{ - return (magic == constant || - magic == __swab32(constant)); -} - -int -lnet_accept(lnet_ni_t *blind_ni, cfs_socket_t *sock, __u32 magic) -{ - lnet_acceptor_connreq_t cr; - __u32 peer_ip; - int peer_port; - int rc; - int flip; - lnet_ni_t *ni; - char *str; - - /* CAVEAT EMPTOR: I may be called by an LND in any thread's context if - * I passed the new socket "blindly" to the single NI that needed an - * acceptor. If so, blind_ni != NULL... */ - - LASSERT (sizeof(cr) <= 16); /* not too big for the stack */ - - rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port); - LASSERT (rc == 0); /* we succeeded before */ - - if (!lnet_accept_magic(magic, LNET_PROTO_ACCEPTOR_MAGIC)) { - - if (lnet_accept_magic(magic, LNET_PROTO_MAGIC)) { - /* future version compatibility! - * When LNET unifies protocols over all LNDs, the first - * thing sent will be a version query. I send back - * LNET_PROTO_ACCEPTOR_MAGIC to tell her I'm "old" */ - - memset (&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc != 0) - CERROR("Error sending magic+version in response" - "to LNET magic from %u.%u.%u.%u: %d\n", - HIPQUAD(peer_ip), rc); - return -EPROTO; - } - - if (magic == le32_to_cpu(LNET_PROTO_TCP_MAGIC)) - str = "'old' socknal/tcpnal"; - else if (lnet_accept_magic(magic, LNET_PROTO_RA_MAGIC)) - str = "'old' ranal"; - else if (lnet_accept_magic(magic, LNET_PROTO_OPENIB_MAGIC)) - str = "'old' openibnal"; - else - str = "unrecognised"; - - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u magic %08x: " - " %s acceptor protocol\n", - HIPQUAD(peer_ip), magic, str); - return -EPROTO; - } - - flip = (magic != LNET_PROTO_ACCEPTOR_MAGIC); - - rc = libcfs_sock_read(sock, &cr.acr_version, - sizeof(cr.acr_version), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request version from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab32s(&cr.acr_version); - - if (cr.acr_version != LNET_PROTO_ACCEPTOR_VERSION) { - /* future version compatibility! - * An acceptor-specific protocol rev will first send a version - * query. I send back my current version to tell her I'm - * "old". */ - int peer_version = cr.acr_version; - - memset (&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - - rc = libcfs_sock_write(sock, &cr, sizeof(cr), - accept_timeout); - - if (rc != 0) - CERROR("Error sending magic+version in response" - "to version %d from %u.%u.%u.%u: %d\n", - peer_version, HIPQUAD(peer_ip), rc); - return -EPROTO; - } - - rc = libcfs_sock_read(sock, &cr.acr_nid, - sizeof(cr) - - offsetof(lnet_acceptor_connreq_t, acr_nid), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - return -EIO; - } - - if (flip) - __swab64s(&cr.acr_nid); - - ni = lnet_net2ni(LNET_NIDNET(cr.acr_nid)); - if (ni == NULL || /* no matching net */ - ni->ni_nid != cr.acr_nid) { /* right NET, wrong NID! */ - if (ni != NULL) - lnet_ni_decref(ni); - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: " - " No matching NI\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - if (ni->ni_lnd->lnd_accept == NULL) { - /* This catches a request for the loopback LND */ - lnet_ni_decref(ni); - LCONSOLE_ERROR("Refusing connection from %u.%u.%u.%u for %s: " - " NI doesn not accept IP connections\n", - HIPQUAD(peer_ip), libcfs_nid2str(cr.acr_nid)); - return -EPERM; - } - - CDEBUG(D_NET, "Accept %s from %u.%u.%u.%u%s\n", - libcfs_nid2str(cr.acr_nid), HIPQUAD(peer_ip), - blind_ni == NULL ? "" : " (blind)"); - - if (blind_ni == NULL) { - /* called by the acceptor: call into the requested NI... */ - rc = ni->ni_lnd->lnd_accept(ni, sock); - } else { - /* portals_compatible set and the (only) NI called me to verify - * and skip the connection request... */ - LASSERT (the_lnet.ln_ptlcompat != 0); - LASSERT (ni == blind_ni); - rc = 0; - } - - lnet_ni_decref(ni); - return rc; -} -EXPORT_SYMBOL(lnet_accept); - -int -lnet_acceptor(void *arg) -{ - char name[16]; - cfs_socket_t *newsock; - int rc; - int n_acceptor_nis; - __u32 magic; - __u32 peer_ip; - int peer_port; - lnet_ni_t *blind_ni = NULL; - int secure = (int)((unsigned long)arg); - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - - if (the_lnet.ln_ptlcompat != 0) { - /* When portals_compatibility is enabled, peers may connect - * without sending an acceptor connection request. There is no - * ambiguity about which network the peer wants to connect to - * since there can only be 1 network, so I pass connections - * "blindly" to it. */ - n_acceptor_nis = lnet_count_acceptor_nis(&blind_ni); - LASSERT (n_acceptor_nis == 1); - LASSERT (blind_ni != NULL); - } - - snprintf(name, sizeof(name), "acceptor_%03d", accept_port); - cfs_daemonize(name); - cfs_block_allsigs(); - - rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock, - 0, accept_port, accept_backlog); - if (rc != 0) { - if (rc == -EADDRINUSE) - LCONSOLE_ERROR("Can't start acceptor on port %d: " - "port already in use\n", - accept_port); - else - LCONSOLE_ERROR("Can't start acceptor on port %d: " - "unexpected error %d\n", - accept_port, rc); - - lnet_acceptor_state.pta_sock = NULL; - } else { - LCONSOLE(0, "Accept %s, port %d%s\n", - accept, accept_port, - blind_ni == NULL ? "" : " (proto compatible)"); - } - - /* set init status and unblock parent */ - lnet_acceptor_state.pta_shutdown = rc; - mutex_up(&lnet_acceptor_state.pta_signal); - - if (rc != 0) - return rc; - - while (lnet_acceptor_state.pta_shutdown == 0) { - - rc = libcfs_sock_accept(&newsock, lnet_acceptor_state.pta_sock); - if (rc != 0) { - if (rc != -EAGAIN) { - CWARN("Accept error %d: pausing...\n", rc); - cfs_pause(cfs_time_seconds(1)); - } - continue; - } - - rc = libcfs_sock_getaddr(newsock, 1, &peer_ip, &peer_port); - if (rc != 0) { - CERROR("Can't determine new connection's address\n"); - goto failed; - } - - if (secure && peer_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) { - CERROR("Refusing connection from %u.%u.%u.%u: " - "insecure port %d\n", - HIPQUAD(peer_ip), peer_port); - goto failed; - } - - if (blind_ni != NULL) { - rc = blind_ni->ni_lnd->lnd_accept(blind_ni, newsock); - if (rc != 0) { - CERROR("NI %s refused 'blind' connection from " - "%u.%u.%u.%u\n", - libcfs_nid2str(blind_ni->ni_nid), - HIPQUAD(peer_ip)); - goto failed; - } - continue; - } - - rc = libcfs_sock_read(newsock, &magic, sizeof(magic), - accept_timeout); - if (rc != 0) { - CERROR("Error %d reading connection request from " - "%u.%u.%u.%u\n", rc, HIPQUAD(peer_ip)); - goto failed; - } - - rc = lnet_accept(NULL, newsock, magic); - if (rc != 0) - goto failed; - - continue; - - failed: - libcfs_sock_release(newsock); - } - - libcfs_sock_release(lnet_acceptor_state.pta_sock); - lnet_acceptor_state.pta_sock = NULL; - - if (blind_ni != NULL) - lnet_ni_decref(blind_ni); - - LCONSOLE(0,"Acceptor stopping\n"); - - /* unblock lnet_acceptor_stop() */ - mutex_up(&lnet_acceptor_state.pta_signal); - return 0; -} - -int -lnet_acceptor_start(void) -{ - long pid; - long secure; - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - init_mutex_locked(&lnet_acceptor_state.pta_signal); - - if (!strcmp(accept, "secure")) { - secure = 1; - } else if (!strcmp(accept, "all")) { - secure = 0; - } else if (!strcmp(accept, "none")) { - return 0; - } else { - LCONSOLE_ERROR ("Can't parse 'accept=\"%s\"'\n", - accept); - return -EINVAL; - } - - if (lnet_count_acceptor_nis(NULL) == 0) /* not required */ - return 0; - - pid = cfs_kernel_thread(lnet_acceptor, (void *)secure, 0); - if (pid < 0) { - CERROR("Can't start acceptor thread: %ld\n", pid); - return -ESRCH; - } - - mutex_down(&lnet_acceptor_state.pta_signal); /* wait for acceptor to startup */ - - if (lnet_acceptor_state.pta_shutdown == 0) { - /* started OK */ - LASSERT (lnet_acceptor_state.pta_sock != NULL); - return 0; - } - - LASSERT (lnet_acceptor_state.pta_sock == NULL); - return -ENETDOWN; -} - -void -lnet_acceptor_stop(void) -{ - if (lnet_acceptor_state.pta_sock == NULL) /* not running */ - return; - - lnet_acceptor_state.pta_shutdown = 1; - libcfs_sock_abort_accept(lnet_acceptor_state.pta_sock); - - /* block until acceptor signals exit */ - mutex_down(&lnet_acceptor_state.pta_signal); -} - -#else /* __KERNEL__ */ - -int -lnet_acceptor_start(void) -{ - return 0; -} - -void -lnet_acceptor_stop(void) -{ -} - -#endif /* !__KERNEL__ */ diff --git a/lnet/lnet/api-errno.c b/lnet/lnet/api-errno.c deleted file mode 100644 index a158d6ea7e4186e8007de5b333b27cb22997b762..0000000000000000000000000000000000000000 --- a/lnet/lnet/api-errno.c +++ /dev/null @@ -1,11 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-errno.c - * Instantiate the string table of errors - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * This file is not subject to copyright protection. - */ - -/* If you change these, you must update the number table in portals/errno.h */ diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c deleted file mode 100644 index a071d39bfd2a628988f751f848f7418bdc0552dc..0000000000000000000000000000000000000000 --- a/lnet/lnet/api-ni.c +++ /dev/null @@ -1,1731 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#ifdef __KERNEL__ -#define D_LNI D_CONSOLE -#else -#define D_LNI D_CONFIG -#endif - -lnet_t the_lnet; /* THE state of the network */ - -#ifdef __KERNEL__ - -static char *ip2nets = ""; -CFS_MODULE_PARM(ip2nets, "s", charp, 0444, - "LNET network <- IP table"); - -static char *networks = ""; -CFS_MODULE_PARM(networks, "s", charp, 0444, - "local networks"); - -static char *routes = ""; -CFS_MODULE_PARM(routes, "s", charp, 0444, - "routes to non-local networks"); - -static char *portals_compatibility = "none"; -CFS_MODULE_PARM(portals_compatibility, "s", charp, 0444, - "wire protocol compatibility: 'strong'|'weak'|'none'"); - -char * -lnet_get_routes(void) -{ - return routes; -} - -char * -lnet_get_networks(void) -{ - char *nets; - int rc; - - if (*networks != 0 && *ip2nets != 0) { - LCONSOLE_ERROR("Please specify EITHER 'networks' or 'ip2nets'" - " but not both at once\n"); - return NULL; - } - - if (*ip2nets != 0) { - rc = lnet_parse_ip2nets(&nets, ip2nets); - return (rc == 0) ? nets : NULL; - } - - if (*networks != 0) - return networks; - - return "tcp"; -} - -int -lnet_get_portals_compatibility(void) -{ - if (!strcmp(portals_compatibility, "none")) { - return 0; - } - - if (!strcmp(portals_compatibility, "weak")) { - return 1; - LCONSOLE_WARN("Starting in weak portals-compatible mode\n"); - } - - if (!strcmp(portals_compatibility, "strong")) { - return 2; - LCONSOLE_WARN("Starting in strong portals-compatible mode\n"); - } - - LCONSOLE_ERROR("portals_compatibility=\"%s\" not supported\n", - portals_compatibility); - return -EINVAL; -} - -void -lnet_init_locks(void) -{ - spin_lock_init (&the_lnet.ln_lock); - cfs_waitq_init (&the_lnet.ln_waitq); - init_mutex(&the_lnet.ln_lnd_mutex); - init_mutex(&the_lnet.ln_api_mutex); -} - -void -lnet_fini_locks(void) -{ -} - -#else - -char * -lnet_get_routes(void) -{ - char *str = getenv("LNET_ROUTES"); - - return (str == NULL) ? "" : str; -} - -char * -lnet_get_networks (void) -{ - static char default_networks[256]; - char *networks = getenv ("LNET_NETWORKS"); - char *ip2nets = getenv ("LNET_IP2NETS"); - char *str; - char *sep; - int len; - int nob; - int rc; - struct list_head *tmp; - -#ifdef NOT_YET - if (networks != NULL && ip2nets != NULL) { - LCONSOLE_ERROR("Please set EITHER 'LNET_NETWORKS' or " - "'LNET_IP2NETS' but not both at once\n"); - return NULL; - } - - if (ip2nets != NULL) { - rc = lnet_parse_ip2nets(&networks, ip2nets); - return (rc == 0) ? networks : NULL; - } -#else - ip2nets = NULL; - rc = 0; -#endif - if (networks != NULL) - return networks; - - /* In userland, the default 'networks=' is the list of known net types */ - - len = sizeof(default_networks); - str = default_networks; - *str = 0; - sep = ""; - - list_for_each (tmp, &the_lnet.ln_lnds) { - lnd_t *lnd = list_entry(tmp, lnd_t, lnd_list); - - nob = snprintf(str, len, "%s%s", sep, - libcfs_lnd2str(lnd->lnd_type)); - len -= nob; - if (len < 0) { - /* overflowed the string; leave it where it was */ - *str = 0; - break; - } - - str += nob; - sep = ","; - } - - return default_networks; -} - -int -lnet_get_portals_compatibility(void) -{ - return 0; -} - -# ifndef HAVE_LIBPTHREAD - -void lnet_init_locks(void) -{ - the_lnet.ln_lock = 0; - the_lnet.ln_lnd_mutex = 0; - the_lnet.ln_api_mutex = 0; -} - -void lnet_fini_locks(void) -{ - LASSERT (the_lnet.ln_api_mutex == 0); - LASSERT (the_lnet.ln_lnd_mutex == 0); - LASSERT (the_lnet.ln_lock == 0); -} - -# else - -void lnet_init_locks(void) -{ - pthread_cond_init(&the_lnet.ln_cond, NULL); - pthread_mutex_init(&the_lnet.ln_lock, NULL); - pthread_mutex_init(&the_lnet.ln_lnd_mutex, NULL); - pthread_mutex_init(&the_lnet.ln_api_mutex, NULL); -} - -void lnet_fini_locks(void) -{ - pthread_mutex_destroy(&the_lnet.ln_api_mutex); - pthread_mutex_destroy(&the_lnet.ln_lnd_mutex); - pthread_mutex_destroy(&the_lnet.ln_lock); - pthread_cond_destroy(&the_lnet.ln_cond); -} - -# endif -#endif - -void lnet_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux robert.bartonsoftware.com 2.6.8-1.521 - * #1 Mon Aug 16 09:01:18 EDT 2004 i686 athlon i386 GNU/Linux - * with gcc version 3.3.3 20040412 (Red Hat Linux 3.3.3-7) */ - - /* Constants... */ - CLASSERT (LNET_PROTO_TCP_MAGIC == 0xeebc0ded); - CLASSERT (LNET_PROTO_TCP_VERSION_MAJOR == 1); - CLASSERT (LNET_PROTO_TCP_VERSION_MINOR == 0); - CLASSERT (LNET_MSG_ACK == 0); - CLASSERT (LNET_MSG_PUT == 1); - CLASSERT (LNET_MSG_GET == 2); - CLASSERT (LNET_MSG_REPLY == 3); - CLASSERT (LNET_MSG_HELLO == 4); - - /* Checks for struct ptl_handle_wire_t */ - CLASSERT ((int)sizeof(lnet_handle_wire_t) == 16); - CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_interface_cookie) == 0); - CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_interface_cookie) == 8); - CLASSERT ((int)offsetof(lnet_handle_wire_t, wh_object_cookie) == 8); - CLASSERT ((int)sizeof(((lnet_handle_wire_t *)0)->wh_object_cookie) == 8); - - /* Checks for struct lnet_magicversion_t */ - CLASSERT ((int)sizeof(lnet_magicversion_t) == 8); - CLASSERT ((int)offsetof(lnet_magicversion_t, magic) == 0); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->magic) == 4); - CLASSERT ((int)offsetof(lnet_magicversion_t, version_major) == 4); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_major) == 2); - CLASSERT ((int)offsetof(lnet_magicversion_t, version_minor) == 6); - CLASSERT ((int)sizeof(((lnet_magicversion_t *)0)->version_minor) == 2); - - /* Checks for struct lnet_hdr_t */ - CLASSERT ((int)sizeof(lnet_hdr_t) == 72); - CLASSERT ((int)offsetof(lnet_hdr_t, dest_nid) == 0); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_nid) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, src_nid) == 8); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_nid) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, dest_pid) == 16); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->dest_pid) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, src_pid) == 20); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->src_pid) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, type) == 24); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->type) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, payload_length) == 28); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->payload_length) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg) == 40); - - /* Ack */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.dst_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.dst_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.ack.mlength) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.ack.mlength) == 4); - - /* Put */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ack_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ack_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.hdr_data) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.hdr_data) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.ptl_index) == 64); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.ptl_index) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.put.offset) == 68); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.put.offset) == 4); - - /* Get */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.return_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.return_wmd) == 16); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.match_bits) == 48); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.match_bits) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.ptl_index) == 56); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.ptl_index) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.src_offset) == 60); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.src_offset) == 4); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.get.sink_length) == 64); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.get.sink_length) == 4); - - /* Reply */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.reply.dst_wmd) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.reply.dst_wmd) == 16); - - /* Hello */ - CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.incarnation) == 32); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.incarnation) == 8); - CLASSERT ((int)offsetof(lnet_hdr_t, msg.hello.type) == 40); - CLASSERT ((int)sizeof(((lnet_hdr_t *)0)->msg.hello.type) == 4); -} - -lnd_t * -lnet_find_lnd_by_type (int type) -{ - lnd_t *lnd; - struct list_head *tmp; - - /* holding lnd mutex */ - list_for_each (tmp, &the_lnet.ln_lnds) { - lnd = list_entry(tmp, lnd_t, lnd_list); - - if (lnd->lnd_type == type) - return lnd; - } - - return NULL; -} - -void -lnet_register_lnd (lnd_t *lnd) -{ - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (libcfs_isknown_lnd(lnd->lnd_type)); - LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == NULL); - - list_add_tail (&lnd->lnd_list, &the_lnet.ln_lnds); - lnd->lnd_refcount = 0; - - CDEBUG(D_NET, "%s LND registered\n", libcfs_lnd2str(lnd->lnd_type)); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); -} - -void -lnet_unregister_lnd (lnd_t *lnd) -{ - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (lnet_find_lnd_by_type(lnd->lnd_type) == lnd); - LASSERT (lnd->lnd_refcount == 0); - - list_del (&lnd->lnd_list); - CDEBUG(D_NET, "%s LND unregistered\n", libcfs_lnd2str(lnd->lnd_type)); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); -} - -#ifndef LNET_USE_LIB_FREELIST - -int -lnet_descriptor_setup (void) -{ - return 0; -} - -void -lnet_descriptor_cleanup (void) -{ -} - -#else - -int -lnet_freelist_init (lnet_freelist_t *fl, int n, int size) -{ - char *space; - - LASSERT (n > 0); - - size += offsetof (lnet_freeobj_t, fo_contents); - - LIBCFS_ALLOC(space, n * size); - if (space == NULL) - return (-ENOMEM); - - CFS_INIT_LIST_HEAD (&fl->fl_list); - fl->fl_objs = space; - fl->fl_nobjs = n; - fl->fl_objsize = size; - - do - { - memset (space, 0, size); - list_add ((struct list_head *)space, &fl->fl_list); - space += size; - } while (--n != 0); - - return (0); -} - -void -lnet_freelist_fini (lnet_freelist_t *fl) -{ - struct list_head *el; - int count; - - if (fl->fl_nobjs == 0) - return; - - count = 0; - for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) - count++; - - LASSERT (count == fl->fl_nobjs); - - LIBCFS_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); -} - -int -lnet_descriptor_setup (void) -{ - /* NB on failure caller must still call lnet_descriptor_cleanup */ - /* ****** */ - int rc; - - memset (&the_lnet.ln_free_mes, 0, sizeof (the_lnet.ln_free_mes)); - memset (&the_lnet.ln_free_msgs, 0, sizeof (the_lnet.ln_free_msgs)); - memset (&the_lnet.ln_free_mds, 0, sizeof (the_lnet.ln_free_mds)); - memset (&the_lnet.ln_free_eqs, 0, sizeof (the_lnet.ln_free_eqs)); - - rc = lnet_freelist_init(&the_lnet.ln_free_mes, - MAX_MES, sizeof (lnet_me_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_msgs, - MAX_MSGS, sizeof (lnet_msg_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_mds, - MAX_MDS, sizeof (lnet_libmd_t)); - if (rc != 0) - return (rc); - - rc = lnet_freelist_init(&the_lnet.ln_free_eqs, - MAX_EQS, sizeof (lnet_eq_t)); - return (rc); -} - -void -lnet_descriptor_cleanup (void) -{ - lnet_freelist_fini (&the_lnet.ln_free_mes); - lnet_freelist_fini (&the_lnet.ln_free_msgs); - lnet_freelist_fini (&the_lnet.ln_free_mds); - lnet_freelist_fini (&the_lnet.ln_free_eqs); -} - -#endif - -__u64 -lnet_create_interface_cookie (void) -{ - /* NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid after reboot. Initialisation time, - * even if it's only implemented to millisecond resolution is probably - * easily good enough. */ - struct timeval tv; - __u64 cookie; -#ifndef __KERNEL__ - int rc = gettimeofday (&tv, NULL); - LASSERT (rc == 0); -#else - do_gettimeofday(&tv); -#endif - cookie = tv.tv_sec; - cookie *= 1000000; - cookie += tv.tv_usec; - return cookie; -} - -int -lnet_setup_handle_hash (void) -{ - int i; - - /* Arbitrary choice of hash table size */ -#ifdef __KERNEL__ - the_lnet.ln_lh_hash_size = CFS_PAGE_SIZE / sizeof (struct list_head); -#else - the_lnet.ln_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; -#endif - LIBCFS_ALLOC(the_lnet.ln_lh_hash_table, - the_lnet.ln_lh_hash_size * sizeof (struct list_head)); - if (the_lnet.ln_lh_hash_table == NULL) - return (-ENOMEM); - - for (i = 0; i < the_lnet.ln_lh_hash_size; i++) - CFS_INIT_LIST_HEAD (&the_lnet.ln_lh_hash_table[i]); - - the_lnet.ln_next_object_cookie = LNET_COOKIE_TYPES; - - return (0); -} - -void -lnet_cleanup_handle_hash (void) -{ - if (the_lnet.ln_lh_hash_table == NULL) - return; - - LIBCFS_FREE(the_lnet.ln_lh_hash_table, - the_lnet.ln_lh_hash_size * sizeof (struct list_head)); -} - -lnet_libhandle_t * -lnet_lookup_cookie (__u64 cookie, int type) -{ - /* ALWAYS called with LNET_LOCK held */ - struct list_head *list; - struct list_head *el; - unsigned int hash; - - if ((cookie & (LNET_COOKIE_TYPES - 1)) != type) - return (NULL); - - hash = ((unsigned int)cookie) % the_lnet.ln_lh_hash_size; - list = &the_lnet.ln_lh_hash_table[hash]; - - list_for_each (el, list) { - lnet_libhandle_t *lh = list_entry (el, lnet_libhandle_t, - lh_hash_chain); - - if (lh->lh_cookie == cookie) - return (lh); - } - - return (NULL); -} - -void -lnet_initialise_handle (lnet_libhandle_t *lh, int type) -{ - /* ALWAYS called with LNET_LOCK held */ - unsigned int hash; - - LASSERT (type >= 0 && type < LNET_COOKIE_TYPES); - lh->lh_cookie = the_lnet.ln_next_object_cookie | type; - the_lnet.ln_next_object_cookie += LNET_COOKIE_TYPES; - - hash = ((unsigned int)lh->lh_cookie) % the_lnet.ln_lh_hash_size; - list_add (&lh->lh_hash_chain, &the_lnet.ln_lh_hash_table[hash]); -} - -void -lnet_invalidate_handle (lnet_libhandle_t *lh) -{ - /* ALWAYS called with LNET_LOCK held */ - list_del (&lh->lh_hash_chain); -} - -int -lnet_init_finalizers(void) -{ -#ifdef __KERNEL__ - int i; - - the_lnet.ln_nfinalizers = num_online_cpus(); - - LIBCFS_ALLOC(the_lnet.ln_finalizers, - the_lnet.ln_nfinalizers * - sizeof(*the_lnet.ln_finalizers)); - if (the_lnet.ln_finalizers == NULL) { - CERROR("Can't allocate ln_finalizers\n"); - return -ENOMEM; - } - - for (i = 0; i < the_lnet.ln_nfinalizers; i++) - the_lnet.ln_finalizers[i] = NULL; -#else - the_lnet.ln_finalizing = 0; -#endif - - CFS_INIT_LIST_HEAD(&the_lnet.ln_finalizeq); - return 0; -} - -void -lnet_fini_finalizers(void) -{ -#ifdef __KERNEL__ - int i; - - for (i = 0; i < the_lnet.ln_nfinalizers; i++) - LASSERT (the_lnet.ln_finalizers[i] == NULL); - - LIBCFS_FREE(the_lnet.ln_finalizers, - the_lnet.ln_nfinalizers * - sizeof(*the_lnet.ln_finalizers)); -#else - LASSERT (!the_lnet.ln_finalizing); -#endif - LASSERT (list_empty(&the_lnet.ln_finalizeq)); -} - -int -lnet_prepare(lnet_pid_t requested_pid) -{ - /* Prepare to bring up the network */ - int rc = 0; - int i; - - LASSERT (the_lnet.ln_refcount == 0); - - the_lnet.ln_routing = 0; - -#ifdef __KERNEL__ - LASSERT ((requested_pid & LNET_PID_USERFLAG) == 0); - the_lnet.ln_pid = requested_pid; -#else - /* My PID must be unique on this node and flag I'm userspace */ - the_lnet.ln_pid = getpid() | LNET_PID_USERFLAG; -#endif - - rc = lnet_descriptor_setup(); - if (rc != 0) - goto failed0; - - memset(&the_lnet.ln_counters, 0, - sizeof(the_lnet.ln_counters)); - - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_msgs); - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_mds); - CFS_INIT_LIST_HEAD (&the_lnet.ln_active_eqs); - CFS_INIT_LIST_HEAD (&the_lnet.ln_test_peers); - CFS_INIT_LIST_HEAD (&the_lnet.ln_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_zombie_nis); - CFS_INIT_LIST_HEAD (&the_lnet.ln_remote_nets); - CFS_INIT_LIST_HEAD (&the_lnet.ln_routers); - - the_lnet.ln_interface_cookie = lnet_create_interface_cookie(); - - lnet_init_rtrpools(); - - rc = lnet_setup_handle_hash (); - if (rc != 0) - goto failed0; - - rc = lnet_create_peer_table(); - if (rc != 0) - goto failed1; - - rc = lnet_init_finalizers(); - if (rc != 0) - goto failed2; - - the_lnet.ln_nportals = MAX_PORTALS; - LIBCFS_ALLOC(the_lnet.ln_portals, - the_lnet.ln_nportals * - sizeof(*the_lnet.ln_portals)); - if (the_lnet.ln_portals == NULL) { - rc = -ENOMEM; - goto failed3; - } - - for (i = 0; i < the_lnet.ln_nportals; i++) { - CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_ml)); - CFS_INIT_LIST_HEAD(&(the_lnet.ln_portals[i].ptl_msgq)); - the_lnet.ln_portals[i].ptl_options = 0; - } - - return 0; - - failed3: - lnet_fini_finalizers(); - failed2: - lnet_destroy_peer_table(); - failed1: - lnet_cleanup_handle_hash(); - failed0: - lnet_descriptor_cleanup(); - return rc; -} - -int -lnet_unprepare (void) -{ - int idx; - - /* NB no LNET_LOCK since this is the last reference. All LND instances - * have shut down already, so it is safe to unlink and free all - * descriptors, even those that appear committed to a network op (eg MD - * with non-zero pending count) */ - - lnet_fail_nid(LNET_NID_ANY, 0); - - LASSERT (list_empty(&the_lnet.ln_test_peers)); - LASSERT (the_lnet.ln_refcount == 0); - LASSERT (list_empty(&the_lnet.ln_nis)); - LASSERT (list_empty(&the_lnet.ln_zombie_nis)); - LASSERT (the_lnet.ln_nzombie_nis == 0); - - for (idx = 0; idx < the_lnet.ln_nportals; idx++) { - - LNetClearLazyPortal(idx); - LASSERT (list_empty(&the_lnet.ln_portals[idx].ptl_msgq)); - - while (!list_empty (&the_lnet.ln_portals[idx].ptl_ml)) { - lnet_me_t *me = list_entry (the_lnet.ln_portals[idx].ptl_ml.next, - lnet_me_t, me_list); - - CERROR ("Active me %p on exit\n", me); - list_del (&me->me_list); - lnet_me_free (me); - } - } - - while (!list_empty (&the_lnet.ln_active_mds)) { - lnet_libmd_t *md = list_entry (the_lnet.ln_active_mds.next, - lnet_libmd_t, md_list); - - CERROR ("Active md %p on exit\n", md); - list_del (&md->md_list); - lnet_md_free (md); - } - - while (!list_empty (&the_lnet.ln_active_eqs)) { - lnet_eq_t *eq = list_entry (the_lnet.ln_active_eqs.next, - lnet_eq_t, eq_list); - - CERROR ("Active eq %p on exit\n", eq); - list_del (&eq->eq_list); - lnet_eq_free (eq); - } - - while (!list_empty (&the_lnet.ln_active_msgs)) { - lnet_msg_t *msg = list_entry (the_lnet.ln_active_msgs.next, - lnet_msg_t, msg_activelist); - - CERROR ("Active msg %p on exit\n", msg); - LASSERT (msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del (&msg->msg_activelist); - lnet_msg_free (msg); - } - - LIBCFS_FREE(the_lnet.ln_portals, - the_lnet.ln_nportals * sizeof(*the_lnet.ln_portals)); - - lnet_free_rtrpools(); - lnet_fini_finalizers(); - lnet_destroy_peer_table(); - lnet_cleanup_handle_hash(); - lnet_descriptor_cleanup(); - - return (0); -} - -lnet_ni_t * -lnet_net2ni_locked (__u32 net) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (lnet_ptlcompat_matchnet(LNET_NIDNET(ni->ni_nid), net)) { - lnet_ni_addref_locked(ni); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnet (__u32 net) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_net2ni_locked(net); - if (ni != NULL) - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); - - return ni != NULL; -} - -lnet_ni_t * -lnet_nid2ni_locked (lnet_nid_t nid) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (lnet_ptlcompat_matchnid(ni->ni_nid, nid)) { - lnet_ni_addref_locked(ni); - return ni; - } - } - - return NULL; -} - -int -lnet_islocalnid (lnet_nid_t nid) -{ - lnet_ni_t *ni; - - LNET_LOCK(); - ni = lnet_nid2ni_locked(nid); - if (ni != NULL) - lnet_ni_decref_locked(ni); - LNET_UNLOCK(); - - return ni != NULL; -} - -int -lnet_count_acceptor_nis (lnet_ni_t **first_ni) -{ - /* Return the # of NIs that need the acceptor. Return the first one in - * *first_ni so the acceptor can pass it connections "blind" to retain - * binary compatibility. */ - int count = 0; -#ifdef __KERNEL__ - struct list_head *tmp; - lnet_ni_t *ni; - - LNET_LOCK(); - list_for_each (tmp, &the_lnet.ln_nis) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (ni->ni_lnd->lnd_accept != NULL) { - /* This LND uses the acceptor */ - if (count == 0 && first_ni != NULL) { - lnet_ni_addref_locked(ni); - *first_ni = ni; - } - count++; - } - } - - LNET_UNLOCK(); -#endif - return count; -} - -void -lnet_shutdown_lndnis (void) -{ - int i; - int islo; - lnet_ni_t *ni; - - /* NB called holding the global mutex */ - - /* All quiet on the API front */ - LASSERT (!the_lnet.ln_shutdown); - LASSERT (the_lnet.ln_refcount == 0); - LASSERT (list_empty(&the_lnet.ln_zombie_nis)); - LASSERT (the_lnet.ln_nzombie_nis == 0); - LASSERT (list_empty(&the_lnet.ln_remote_nets)); - - LNET_LOCK(); - the_lnet.ln_shutdown = 1; /* flag shutdown */ - - /* Unlink NIs from the global table */ - while (!list_empty(&the_lnet.ln_nis)) { - ni = list_entry(the_lnet.ln_nis.next, - lnet_ni_t, ni_list); - list_del (&ni->ni_list); - - the_lnet.ln_nzombie_nis++; - lnet_ni_decref_locked(ni); /* drop apini's ref */ - } - - /* Drop the cached eqwait NI. */ - if (the_lnet.ln_eqwaitni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_eqwaitni); - the_lnet.ln_eqwaitni = NULL; - } - - /* Drop the cached loopback NI. */ - if (the_lnet.ln_loni != NULL) { - lnet_ni_decref_locked(the_lnet.ln_loni); - the_lnet.ln_loni = NULL; - } - - LNET_UNLOCK(); - /* Clear the peer table and wait for all peers to go (they hold refs on - * their NIs) */ - - lnet_clear_peer_table(); - - LNET_LOCK(); - /* Now wait for the NI's I just nuked to show up on apini_zombie_nis - * and shut them down in guaranteed thread context */ - i = 2; - while (the_lnet.ln_nzombie_nis != 0) { - - while (list_empty(&the_lnet.ln_zombie_nis)) { - LNET_UNLOCK(); - ++i; - if ((i & (-i)) == i) - CDEBUG(D_WARNING,"Waiting for %d zombie NIs\n", - the_lnet.ln_nzombie_nis); - cfs_pause(cfs_time_seconds(1)); - LNET_LOCK(); - } - - ni = list_entry(the_lnet.ln_zombie_nis.next, - lnet_ni_t, ni_list); - list_del(&ni->ni_list); - ni->ni_lnd->lnd_refcount--; - - LNET_UNLOCK(); - - islo = ni->ni_lnd->lnd_type == LOLND; - - LASSERT (!in_interrupt ()); - (ni->ni_lnd->lnd_shutdown)(ni); - - /* can't deref lnd anymore now; it might have unregistered - * itself... */ - - if (!islo) - CDEBUG(D_LNI, "Removed LNI %s\n", - libcfs_nid2str(ni->ni_nid)); - - LIBCFS_FREE(ni, sizeof(*ni)); - - LNET_LOCK(); - the_lnet.ln_nzombie_nis--; - } - - the_lnet.ln_shutdown = 0; - LNET_UNLOCK(); - - if (the_lnet.ln_network_tokens != NULL) { - LIBCFS_FREE(the_lnet.ln_network_tokens, - the_lnet.ln_network_tokens_nob); - the_lnet.ln_network_tokens = NULL; - } -} - -int -lnet_startup_lndnis (void) -{ - lnd_t *lnd; - lnet_ni_t *ni; - struct list_head nilist; - int rc = 0; - int lnd_type; - int nicount = 0; - char *nets = lnet_get_networks(); - - INIT_LIST_HEAD(&nilist); - - if (nets == NULL) - goto failed; - - rc = lnet_parse_networks(&nilist, nets); - if (rc != 0) - goto failed; - - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); - lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid)); - - LASSERT (libcfs_isknown_lnd(lnd_type)); - - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - lnd = lnet_find_lnd_by_type(lnd_type); - -#ifdef __KERNEL__ - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - rc = request_module(libcfs_lnd2modname(lnd_type)); - LNET_MUTEX_DOWN(&the_lnet.ln_lnd_mutex); - - lnd = lnet_find_lnd_by_type(lnd_type); - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - CERROR("Can't load LND %s, module %s, rc=%d\n", - libcfs_lnd2str(lnd_type), - libcfs_lnd2modname(lnd_type), rc); -#ifndef CONFIG_KMOD - LCONSOLE_ERROR("Your kernel must be compiled " - "with CONFIG_KMOD set for " - "automatic module loading."); -#endif - goto failed; - } - } -#else - if (lnd == NULL) { - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - CERROR("LND %s not supported\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } -#endif - - ni->ni_refcount = 1; - - LNET_LOCK(); - lnd->lnd_refcount++; - LNET_UNLOCK(); - - ni->ni_lnd = lnd; - - rc = (lnd->lnd_startup)(ni); - - LNET_MUTEX_UP(&the_lnet.ln_lnd_mutex); - - if (rc != 0) { - LCONSOLE_ERROR("Error %d starting up LNI %s\n", - rc, libcfs_lnd2str(lnd->lnd_type)); - LNET_LOCK(); - lnd->lnd_refcount--; - LNET_UNLOCK(); - goto failed; - } - - list_del(&ni->ni_list); - - LNET_LOCK(); - list_add_tail(&ni->ni_list, &the_lnet.ln_nis); - LNET_UNLOCK(); - - if (lnd->lnd_type == LOLND) { - lnet_ni_addref(ni); - LASSERT (the_lnet.ln_loni == NULL); - the_lnet.ln_loni = ni; - continue; - } - -#ifndef __KERNEL__ - if (lnd->lnd_wait != NULL) { - if (the_lnet.ln_eqwaitni == NULL) { - lnet_ni_addref(ni); - the_lnet.ln_eqwaitni = ni; - } - } else { -# ifndef HAVE_LIBPTHREAD - LCONSOLE_ERROR("LND %s not supported in a " - "single-threaded runtime\n", - libcfs_lnd2str(lnd_type)); - goto failed; -# endif - } -#endif - if (ni->ni_peertxcredits == 0 || - ni->ni_maxtxcredits == 0) { - LCONSOLE_ERROR("LNI %s has no %scredits\n", - libcfs_lnd2str(lnd->lnd_type), - ni->ni_peertxcredits == 0 ? - "" : "per-peer "); - goto failed; - } - - ni->ni_txcredits = ni->ni_mintxcredits = ni->ni_maxtxcredits; - - CDEBUG(D_LNI, "Added LNI %s [%d/%d]\n", - libcfs_nid2str(ni->ni_nid), - ni->ni_peertxcredits, ni->ni_txcredits); - - /* Handle nidstrings for network 0 just like this one */ - if (the_lnet.ln_ptlcompat > 0) { - if (nicount > 0) { - LCONSOLE_ERROR("Can't run > 1 network when " - "portals_compatibility is set\n"); - goto failed; - } - libcfs_setnet0alias(lnd->lnd_type); - } - - nicount++; - } - - if (the_lnet.ln_eqwaitni != NULL && nicount > 1) { - lnd_type = the_lnet.ln_eqwaitni->ni_lnd->lnd_type; - LCONSOLE_ERROR("LND %s can only run single-network\n", - libcfs_lnd2str(lnd_type)); - goto failed; - } - - return 0; - - failed: - lnet_shutdown_lndnis(); - - while (!list_empty(&nilist)) { - ni = list_entry(nilist.next, lnet_ni_t, ni_list); - list_del(&ni->ni_list); - LIBCFS_FREE(ni, sizeof(*ni)); - } - - return -ENETDOWN; -} - -int -LNetInit(void) -{ - int rc; - - lnet_assert_wire_constants (); - LASSERT (!the_lnet.ln_init); - - memset(&the_lnet, 0, sizeof(the_lnet)); - - rc = lnet_get_portals_compatibility(); - if (rc < 0) - return rc; - - lnet_init_locks(); - CFS_INIT_LIST_HEAD(&the_lnet.ln_lnds); - the_lnet.ln_ptlcompat = rc; - the_lnet.ln_refcount = 0; - the_lnet.ln_init = 1; - -#ifdef __KERNEL__ - /* All LNDs apart from the LOLND are in separate modules. They - * register themselves when their module loads, and unregister - * themselves when their module is unloaded. */ -#else - /* Register LNDs - * NB the order here determines default 'networks=' order */ -# ifdef CRAY_XT3 - LNET_REGISTER_ULND(the_ptllnd); -# endif -# ifdef HAVE_LIBPTHREAD - LNET_REGISTER_ULND(the_tcplnd); -# endif -#endif - lnet_register_lnd(&the_lolnd); - return 0; -} - -void -LNetFini(void) -{ - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount == 0); - - while (!list_empty(&the_lnet.ln_lnds)) - lnet_unregister_lnd(list_entry(the_lnet.ln_lnds.next, - lnd_t, lnd_list)); - lnet_fini_locks(); - - the_lnet.ln_init = 0; -} - -int -LNetNIInit(lnet_pid_t requested_pid) -{ - int im_a_router = 0; - int rc; - - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - - LASSERT (the_lnet.ln_init); - CDEBUG(D_OTHER, "refs %d\n", the_lnet.ln_refcount); - - if (the_lnet.ln_refcount > 0) { - rc = the_lnet.ln_refcount++; - goto out; - } - - if (requested_pid == LNET_PID_ANY) { - /* Don't instantiate LNET just for me */ - rc = -ENETDOWN; - goto failed0; - } - - rc = lnet_prepare(requested_pid); - if (rc != 0) - goto failed0; - - rc = lnet_startup_lndnis(); - if (rc != 0) - goto failed1; - - rc = lnet_parse_routes(lnet_get_routes(), &im_a_router); - if (rc != 0) - goto failed2; - - rc = lnet_check_routes(); - if (rc != 0) - goto failed2; - - rc = lnet_alloc_rtrpools(im_a_router); - if (rc != 0) - goto failed2; - - rc = lnet_acceptor_start(); - if (rc != 0) - goto failed2; - - the_lnet.ln_refcount = 1; - /* Now I may use my own API functions... */ - - rc = lnet_router_checker_start(); - if (rc != 0) - goto failed3; - - rc = lnet_ping_target_init(); - if (rc != 0) - goto failed4; - - lnet_proc_init(); - goto out; - - failed4: - lnet_router_checker_stop(); - failed3: - the_lnet.ln_refcount = 0; - lnet_acceptor_stop(); - failed2: - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - failed1: - lnet_unprepare(); - failed0: - LASSERT (rc < 0); - out: - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - return rc; -} - -int -LNetNIFini() -{ - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (the_lnet.ln_refcount != 1) { - the_lnet.ln_refcount--; - } else { - LASSERT (!the_lnet.ln_niinit_self); - - lnet_proc_fini(); - lnet_ping_target_fini(); - lnet_router_checker_stop(); - - /* Teardown fns that use my own API functions BEFORE here */ - the_lnet.ln_refcount = 0; - - lnet_acceptor_stop(); - lnet_destroy_routes(); - lnet_shutdown_lndnis(); - lnet_unprepare(); - } - - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - return 0; -} - -int -LNetCtl(unsigned int cmd, void *arg) -{ - struct libcfs_ioctl_data *data = arg; - lnet_process_id_t id; - lnet_ni_t *ni; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - switch (cmd) { - case IOC_LIBCFS_GET_NI: - rc = LNetGetId(data->ioc_count, &id); - data->ioc_nid = id.nid; - return rc; - - case IOC_LIBCFS_FAIL_NID: - return lnet_fail_nid(data->ioc_nid, data->ioc_count); - - case IOC_LIBCFS_ADD_ROUTE: - rc = lnet_add_route(data->ioc_net, data->ioc_count, - data->ioc_nid); - return (rc != 0) ? rc : lnet_check_routes(); - - case IOC_LIBCFS_DEL_ROUTE: - return lnet_del_route(data->ioc_net, data->ioc_nid); - - case IOC_LIBCFS_GET_ROUTE: - return lnet_get_route(data->ioc_count, - &data->ioc_net, &data->ioc_count, - &data->ioc_nid, &data->ioc_flags); - case IOC_LIBCFS_NOTIFY_ROUTER: - return lnet_notify(NULL, data->ioc_nid, data->ioc_flags, - (time_t)data->ioc_u64[0]); - - case IOC_LIBCFS_PORTALS_COMPATIBILITY: - return the_lnet.ln_ptlcompat; - - case IOC_LIBCFS_LNET_DIST: - rc = LNetDist(data->ioc_nid, &data->ioc_nid, &data->ioc_u32[1]); - if (rc < 0 && rc != -EHOSTUNREACH) - return rc; - - data->ioc_u32[0] = rc; - return 0; - - case IOC_LIBCFS_TESTPROTOCOMPAT: - LNET_LOCK(); - the_lnet.ln_testprotocompat = data->ioc_flags; - LNET_UNLOCK(); - return 0; - - case IOC_LIBCFS_PING: - rc = lnet_ping((lnet_process_id_t) {.nid = data->ioc_nid, - .pid = data->ioc_u32[0]}, - data->ioc_u32[1], /* timeout */ - (lnet_process_id_t *)data->ioc_pbuf1, - data->ioc_plen1/sizeof(lnet_process_id_t)); - if (rc < 0) - return rc; - data->ioc_count = rc; - return 0; - - case IOC_LIBCFS_DEBUG_PEER: { - /* CAVEAT EMPTOR: this one designed for calling directly; not - * via an ioctl */ - lnet_process_id_t *id = arg; - - lnet_debug_peer(id->nid); - - ni = lnet_net2ni(LNET_NIDNET(id->nid)); - if (ni == NULL) { - CDEBUG(D_WARNING, "No NI for %s\n", libcfs_id2str(*id)); - } else { - if (ni->ni_lnd->lnd_ctl == NULL) { - CDEBUG(D_WARNING, "No ctl for %s\n", - libcfs_id2str(*id)); - } else { - (void)ni->ni_lnd->lnd_ctl(ni, cmd, arg); - } - - lnet_ni_decref(ni); - } - return 0; - } - - default: - ni = lnet_net2ni(data->ioc_net); - if (ni == NULL) - return -EINVAL; - - if (ni->ni_lnd->lnd_ctl == NULL) - rc = -EINVAL; - else - rc = ni->ni_lnd->lnd_ctl(ni, cmd, arg); - - lnet_ni_decref(ni); - return rc; - } - /* not reached */ -} - -int -LNetGetId(unsigned int index, lnet_process_id_t *id) -{ - lnet_ni_t *ni; - struct list_head *tmp; - int rc = -ENOENT; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - list_for_each(tmp, &the_lnet.ln_nis) { - if (index-- != 0) - continue; - - ni = list_entry(tmp, lnet_ni_t, ni_list); - - id->nid = ni->ni_nid; - id->pid = the_lnet.ln_pid; - rc = 0; - break; - } - - LNET_UNLOCK(); - - return rc; -} - -void -LNetSnprintHandle(char *str, int len, lnet_handle_any_t h) -{ - snprintf(str, len, LPX64, h.cookie); -} - - -int -lnet_ping_target_init(void) -{ - lnet_handle_me_t meh; - lnet_process_id_t id; - int rc; - int rc2; - int n; - int infosz; - int i; - - for (n = 0; ; n++) { - rc = LNetGetId(n, &id); - if (rc == -ENOENT) - break; - - LASSERT (rc == 0); - } - - infosz = offsetof(lnet_ping_info_t, pi_nid[n]); - LIBCFS_ALLOC(the_lnet.ln_ping_info, infosz); - if (the_lnet.ln_ping_info == NULL) { - CERROR("Can't allocate ping info[%d]\n", n); - return -ENOMEM; - } - - the_lnet.ln_ping_info->pi_magic = LNET_PROTO_PING_MAGIC; - the_lnet.ln_ping_info->pi_version = LNET_PROTO_PING_VERSION; - the_lnet.ln_ping_info->pi_pid = the_lnet.ln_pid; - the_lnet.ln_ping_info->pi_nnids = n; - - for (i = 0; i < n; i++) { - rc = LNetGetId(i, &id); - LASSERT (rc == 0); - the_lnet.ln_ping_info->pi_nid[i] = id.nid; - } - - /* We can have a tiny EQ since we only need to see the unlink event on - * teardown, which by definition is the last one! */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &the_lnet.ln_ping_target_eq); - if (rc != 0) { - CERROR("Can't allocate ping EQ: %d\n", rc); - goto failed_0; - } - - rc = LNetMEAttach(LNET_RESERVED_PORTAL, - (lnet_process_id_t){.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}, - LNET_PROTO_PING_MATCHBITS, 0LL, - LNET_UNLINK, LNET_INS_AFTER, - &meh); - if (rc != 0) { - CERROR("Can't create ping ME: %d\n", rc); - goto failed_1; - } - - rc = LNetMDAttach(meh, - (lnet_md_t){.start = the_lnet.ln_ping_info, - .length = infosz, - .threshold = LNET_MD_THRESH_INF, - .options = (LNET_MD_OP_GET | - LNET_MD_TRUNCATE | - LNET_MD_MANAGE_REMOTE), - .eq_handle = the_lnet.ln_ping_target_eq}, - LNET_RETAIN, - &the_lnet.ln_ping_target_md); - if (rc != 0) { - CERROR("Can't attach ping MD: %d\n", rc); - goto failed_2; - } - - return 0; - - failed_2: - rc2 = LNetMEUnlink(meh); - LASSERT (rc2 == 0); - failed_1: - rc2 = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT (rc2 == 0); - failed_0: - LIBCFS_FREE(the_lnet.ln_ping_info, infosz); - - return rc; -} - -void -lnet_ping_target_fini(void) -{ - lnet_event_t event; - int rc; - int which; - int timeout_ms = 1000; - cfs_sigset_t blocked = cfs_block_allsigs(); - - LNetMDUnlink(the_lnet.ln_ping_target_md); - /* NB md could be busy; this just starts the unlink */ - - for (;;) { - rc = LNetEQPoll(&the_lnet.ln_ping_target_eq, 1, - timeout_ms, &event, &which); - - /* I expect overflow... */ - LASSERT (rc >= 0 || rc == -EOVERFLOW); - - if (rc == 0) { - /* timed out: provide a diagnostic */ - CWARN("Still waiting for ping MD to unlink\n"); - timeout_ms *= 2; - continue; - } - - /* Got a valid event */ - if (event.unlinked) - break; - } - - rc = LNetEQFree(the_lnet.ln_ping_target_eq); - LASSERT (rc == 0); - - LIBCFS_FREE(the_lnet.ln_ping_info, - offsetof(lnet_ping_info_t, - pi_nid[the_lnet.ln_ping_info->pi_nnids])); - - cfs_restore_sigs(blocked); -} - -int -lnet_ping (lnet_process_id_t id, int timeout_ms, lnet_process_id_t *ids, int n_ids) -{ - lnet_handle_eq_t eqh; - lnet_handle_md_t mdh; - lnet_event_t event; - int which; - int unlinked = 0; - int replied = 0; - const int a_long_time = 60000; /* mS */ - int infosz = offsetof(lnet_ping_info_t, pi_nid[n_ids]); - lnet_ping_info_t *info; - lnet_process_id_t tmpid; - int i; - int nob; - int rc; - int rc2; - cfs_sigset_t blocked; - - if (n_ids <= 0 || - id.nid == LNET_NID_ANY || - timeout_ms > 500000 || /* arbitrary limit! */ - n_ids > 20) /* arbitrary limit! */ - return -EINVAL; - - if (id.pid == LNET_PID_ANY) - id.pid = LUSTRE_SRV_LNET_PID; - - LIBCFS_ALLOC(info, infosz); - if (info == NULL) - return -ENOMEM; - - /* NB 2 events max (including any unlink event) */ - rc = LNetEQAlloc(2, LNET_EQ_HANDLER_NONE, &eqh); - if (rc != 0) { - CERROR("Can't allocate EQ: %d\n", rc); - goto out_0; - } - - rc = LNetMDBind((lnet_md_t){.start = info, - .length = infosz, - .threshold = 2, /* GET/REPLY */ - .options = LNET_MD_TRUNCATE, - .eq_handle = eqh}, - LNET_UNLINK, - &mdh); - if (rc != 0) { - CERROR("Can't bind MD: %d\n", rc); - goto out_1; - } - - rc = LNetGet(LNET_NID_ANY, mdh, id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - - if (rc != 0) { - /* Don't CERROR; this could be deliberate! */ - - rc2 = LNetMDUnlink(mdh); - LASSERT (rc2 == 0); - - /* NB must wait for the UNLINK event below... */ - unlinked = 1; - timeout_ms = a_long_time; - } - - do { - /* MUST block for unlink to complete */ - if (unlinked) - blocked = cfs_block_allsigs(); - - rc2 = LNetEQPoll(&eqh, 1, timeout_ms, &event, &which); - - if (unlinked) - cfs_restore_sigs(blocked); - - CDEBUG(D_NET, "poll %d(%d %d)%s\n", rc2, - (rc2 <= 0) ? -1 : event.type, - (rc2 <= 0) ? -1 : event.status, - (rc2 > 0 && event.unlinked) ? " unlinked" : ""); - - LASSERT (rc2 != -EOVERFLOW); /* can't miss anything */ - - if (rc2 <= 0 || event.status != 0) { - /* timeout or error */ - if (!replied && rc == 0) - rc = (rc2 < 0) ? rc2 : - (rc2 == 0) ? -ETIMEDOUT : - event.status; - - if (!unlinked) { - /* Ensure completion in finite time... */ - LNetMDUnlink(mdh); - /* No assertion (racing with network) */ - unlinked = 1; - timeout_ms = a_long_time; - } else if (rc2 == 0) { - /* timed out waiting for unlink */ - CWARN("ping %s: late network completion\n", - libcfs_id2str(id)); - } - - } else if (event.type == LNET_EVENT_REPLY) { - replied = 1; - rc = event.mlength; - } - - } while (rc2 <= 0 || !event.unlinked); - - if (!replied) { - if (rc >= 0) - CWARN("%s: Unexpected rc >= 0 but no reply!\n", - libcfs_id2str(id)); - rc = -EIO; - goto out_1; - } - - nob = rc; - LASSERT (nob >= 0 && nob <= infosz); - - rc = -EPROTO; /* if I can't parse... */ - - if (nob < 8) { - /* can't check magic/version */ - CERROR("%s: ping info too short %d\n", - libcfs_id2str(id), nob); - goto out_1; - } - - if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC)) { - /* NB I might be swabbing garbage until I check below, but it - * doesn't matter */ - __swab32s(&info->pi_version); - __swab32s(&info->pi_pid); - __swab32s(&info->pi_nnids); - for (i = 0; i < info->pi_nnids && i < n_ids; i++) - __swab64s(&info->pi_nid[i]); - - } else if (info->pi_magic != LNET_PROTO_PING_MAGIC) { - CERROR("%s: Unexpected magic %08x\n", - libcfs_id2str(id), info->pi_magic); - goto out_1; - } - - if (info->pi_version != LNET_PROTO_PING_VERSION) { - CERROR("%s: Unexpected version 0x%x\n", - libcfs_id2str(id), info->pi_version); - goto out_1; - } - - if (nob < offsetof(lnet_ping_info_t, pi_nid[0])) { - CERROR("%s: Short reply %d(%d min)\n", libcfs_id2str(id), - nob, (int)offsetof(lnet_ping_info_t, pi_nid[0])); - goto out_1; - } - - if (info->pi_nnids < n_ids) - n_ids = info->pi_nnids; - - if (nob < offsetof(lnet_ping_info_t, pi_nid[n_ids])) { - CERROR("%s: Short reply %d(%d expected)\n", libcfs_id2str(id), - nob, (int)offsetof(lnet_ping_info_t, pi_nid[n_ids])); - goto out_1; - } - - rc = -EFAULT; /* If I SEGV... */ - - for (i = 0; i < n_ids; i++) { - tmpid.pid = info->pi_pid; - tmpid.nid = info->pi_nid[i]; -#ifdef __KERNEL__ - if (copy_to_user(&ids[i], &tmpid, sizeof(tmpid))) - goto out_1; -#else - ids[i] = tmpid; -#endif - } - rc = info->pi_nnids; - - out_1: - rc2 = LNetEQFree(eqh); - if (rc2 != 0) - CERROR("rc2 %d\n", rc2); - LASSERT (rc2 == 0); - - out_0: - LIBCFS_FREE(info, infosz); - return rc; -} diff --git a/lnet/lnet/autoMakefile.am b/lnet/lnet/autoMakefile.am deleted file mode 100644 index 9ce40fe8434d88383299eebe94855be3a122e5c8..0000000000000000000000000000000000000000 --- a/lnet/lnet/autoMakefile.am +++ /dev/null @@ -1,46 +0,0 @@ -my_sources = api-errno.c api-ni.c config.c \ - lib-me.c lib-msg.c lib-eq.c \ - lib-md.c lib-move.c lo.c \ - router.c router_proc.c \ - acceptor.c peer.c - - -if LIBLUSTRE -noinst_LIBRARIES= liblnet.a -liblnet_a_SOURCES= $(my_sources) -liblnet_a_CPPFLAGS = $(LLCPPFLAGS) -liblnet_a_CFLAGS = $(LLCFLAGS) -endif - -if MODULES - -if LINUX -modulenet_DATA = lnet$(KMODEXT) -endif # LINUX - -if DARWIN -macos_PROGRAMS := lnet - -lnet_SOURCES := api-errno.c api-ni.c config.c -lnet_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c -lnet_SOURCES += lib-move.c module.c lo.c router.c router_proc.c -lnet_SOURCES += acceptor.c peer.c - -lnet_CFLAGS := $(EXTRA_KCFLAGS) -lnet_LDFLAGS := $(EXTRA_KLDFLAGS) -lnet_LDADD := $(EXTRA_KLIBS) - -plist_DATA := Info.plist - -install_data_hook := fix-kext-ownership - -endif # DARWIN - -endif # MODULES - -install-data-hook: $(install_data_hook) - -EXTRA_DIST := Info.plist - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ lnet -DIST_SOURCES = $(lnet-objs:%.o=%.c) diff --git a/lnet/lnet/config.c b/lnet/lnet/config.c deleted file mode 100644 index cd5e21103cc0d44147a437634def005619d979f6..0000000000000000000000000000000000000000 --- a/lnet/lnet/config.c +++ /dev/null @@ -1,1386 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -typedef struct { /* tmp struct for parsing routes */ - struct list_head ltb_list; /* stash on lists */ - int ltb_size; /* allocated size */ - char ltb_text[0]; /* text buffer */ -} lnet_text_buf_t; - -static int lnet_tbnob = 0; /* track text buf allocation */ -#define LNET_MAX_TEXTBUF_NOB (64<<10) /* bound allocation */ -#define LNET_SINGLE_TEXTBUF_NOB (4<<10) - -typedef struct { - struct list_head lre_list; /* stash in a list */ - int lre_min; /* min value */ - int lre_max; /* max value */ - int lre_stride; /* stride */ -} lnet_range_expr_t; - -static int lnet_re_alloc = 0; /* track expr allocation */ - -void -lnet_syntax(char *name, char *str, int offset, int width) -{ - static char dots[LNET_SINGLE_TEXTBUF_NOB]; - static char dashes[LNET_SINGLE_TEXTBUF_NOB]; - - memset(dots, '.', sizeof(dots)); - dots[sizeof(dots)-1] = 0; - memset(dashes, '-', sizeof(dashes)); - dashes[sizeof(dashes)-1] = 0; - - LCONSOLE_ERROR("Error parsing '%s=\"%s\"'\n", name, str); - LCONSOLE_ERROR("here...........%.*s..%.*s|%.*s|\n", - (int)strlen(name), dots, offset, dots, - (width < 1) ? 0 : width - 1, dashes); -} - -int -lnet_issep (char c) -{ - switch (c) { - case '\n': - case '\r': - case ';': - return 1; - default: - return 0; - } -} - -int -lnet_iswhite (char c) -{ - switch (c) { - case ' ': - case '\t': - case '\n': - case '\r': - return 1; - default: - return 0; - } -} - -char * -lnet_trimwhite(char *str) -{ - char *end; - - while (lnet_iswhite(*str)) - str++; - - end = str + strlen(str); - while (end > str) { - if (!lnet_iswhite(end[-1])) - break; - end--; - } - - *end = 0; - return str; -} - -int -lnet_net_unique(__u32 net, struct list_head *nilist) -{ - struct list_head *tmp; - lnet_ni_t *ni; - - list_for_each (tmp, nilist) { - ni = list_entry(tmp, lnet_ni_t, ni_list); - - if (LNET_NIDNET(ni->ni_nid) == net) - return 0; - } - - return 1; -} - -lnet_ni_t * -lnet_new_ni(__u32 net, struct list_head *nilist) -{ - lnet_ni_t *ni; - - if (!lnet_net_unique(net, nilist)) { - LCONSOLE_ERROR("Duplicate network specified: %s\n", - libcfs_net2str(net)); - return NULL; - } - - LIBCFS_ALLOC(ni, sizeof(*ni)); - if (ni == NULL) { - CERROR("Out of memory creating network %s\n", - libcfs_net2str(net)); - return NULL; - } - - /* zero counters/flags, NULL pointers... */ - memset(ni, 0, sizeof(*ni)); - - /* LND will fill in the address part of the NID */ - ni->ni_nid = LNET_MKNID(net, 0); - CFS_INIT_LIST_HEAD(&ni->ni_txq); - - list_add_tail(&ni->ni_list, nilist); - return ni; -} - -int -lnet_parse_networks(struct list_head *nilist, char *networks) -{ - int tokensize = strlen(networks) + 1; - char *tokens; - char *str; - lnet_ni_t *ni; - __u32 net; - int nnets = 0; - - if (strlen(networks) > LNET_SINGLE_TEXTBUF_NOB) { - /* _WAY_ conservative */ - LCONSOLE_ERROR("Can't parse networks: string too long\n"); - return -EINVAL; - } - - LIBCFS_ALLOC(tokens, tokensize); - if (tokens == NULL) { - CERROR("Can't allocate net tokens\n"); - return -ENOMEM; - } - - the_lnet.ln_network_tokens = tokens; - the_lnet.ln_network_tokens_nob = tokensize; - memcpy (tokens, networks, tokensize); - str = tokens; - - /* Add in the loopback network */ - ni = lnet_new_ni(LNET_MKNET(LOLND, 0), nilist); - if (ni == NULL) - goto failed; - - while (str != NULL && *str != 0) { - char *comma = strchr(str, ','); - char *bracket = strchr(str, '('); - int niface; - char *iface; - - /* NB we don't check interface conflicts here; it's the LNDs - * responsibility (if it cares at all) */ - - if (bracket == NULL || - (comma != NULL && comma < bracket)) { - - /* no interface list specified */ - - if (comma != NULL) - *comma++ = 0; - net = libcfs_str2net(lnet_trimwhite(str)); - - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - LCONSOLE_ERROR("Unrecognised network type\n"); - goto failed; - } - - if (LNET_NETTYP(net) != LOLND && /* loopback is implicit */ - lnet_new_ni(net, nilist) == NULL) - goto failed; - - str = comma; - continue; - } - - *bracket = 0; - net = libcfs_str2net(lnet_trimwhite(str)); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - - if (nnets > 0 && - the_lnet.ln_ptlcompat > 0) { - LCONSOLE_ERROR("Only 1 network supported when " - "'portals_compatible' is set\n"); - goto failed; - } - - nnets++; - ni = lnet_new_ni(net, nilist); - if (ni == NULL) - goto failed; - - niface = 0; - iface = bracket + 1; - - bracket = strchr(iface, ')'); - if (bracket == NULL) { - lnet_syntax("networks", networks, - iface - tokens, strlen(iface)); - goto failed; - } - - *bracket = 0; - do { - comma = strchr(iface, ','); - if (comma != NULL) - *comma++ = 0; - - iface = lnet_trimwhite(iface); - if (*iface == 0) { - lnet_syntax("networks", networks, - iface - tokens, strlen(iface)); - goto failed; - } - - if (niface == LNET_MAX_INTERFACES) { - LCONSOLE_ERROR("Too many interfaces for net %s\n", - libcfs_net2str(net)); - goto failed; - } - - ni->ni_interfaces[niface++] = iface; - iface = comma; - } while (iface != NULL); - - str = bracket + 1; - comma = strchr(bracket + 1, ','); - if (comma != NULL) { - *comma = 0; - str = lnet_trimwhite(str); - if (*str != 0) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - str = comma + 1; - continue; - } - - str = lnet_trimwhite(str); - if (*str != 0) { - lnet_syntax("networks", networks, - str - tokens, strlen(str)); - goto failed; - } - } - - LASSERT (!list_empty(nilist)); - return 0; - - failed: - while (!list_empty(nilist)) { - ni = list_entry(nilist->next, lnet_ni_t, ni_list); - - list_del(&ni->ni_list); - LIBCFS_FREE(ni, sizeof(*ni)); - } - LIBCFS_FREE(tokens, tokensize); - the_lnet.ln_network_tokens = NULL; - - return -EINVAL; -} - -lnet_text_buf_t * -lnet_new_text_buf (int str_len) -{ - lnet_text_buf_t *ltb; - int nob; - - /* NB allocate space for the terminating 0 */ - nob = offsetof(lnet_text_buf_t, ltb_text[str_len + 1]); - if (nob > LNET_SINGLE_TEXTBUF_NOB) { - /* _way_ conservative for "route net gateway..." */ - CERROR("text buffer too big\n"); - return NULL; - } - - if (lnet_tbnob + nob > LNET_MAX_TEXTBUF_NOB) { - CERROR("Too many text buffers\n"); - return NULL; - } - - LIBCFS_ALLOC(ltb, nob); - if (ltb == NULL) - return NULL; - - ltb->ltb_size = nob; - ltb->ltb_text[0] = 0; - lnet_tbnob += nob; - return ltb; -} - -void -lnet_free_text_buf (lnet_text_buf_t *ltb) -{ - lnet_tbnob -= ltb->ltb_size; - LIBCFS_FREE(ltb, ltb->ltb_size); -} - -void -lnet_free_text_bufs(struct list_head *tbs) -{ - lnet_text_buf_t *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } -} - -void -lnet_print_text_bufs(struct list_head *tbs) -{ - struct list_head *tmp; - lnet_text_buf_t *ltb; - - list_for_each (tmp, tbs) { - ltb = list_entry(tmp, lnet_text_buf_t, ltb_list); - - CDEBUG(D_WARNING, "%s\n", ltb->ltb_text); - } - - CDEBUG(D_WARNING, "%d allocated\n", lnet_tbnob); -} - -int -lnet_str2tbs_sep (struct list_head *tbs, char *str) -{ - struct list_head pending; - char *sep; - int nob; - int i; - lnet_text_buf_t *ltb; - - INIT_LIST_HEAD(&pending); - - /* Split 'str' into separate commands */ - for (;;) { - /* skip leading whitespace */ - while (lnet_iswhite(*str)) - str++; - - /* scan for separator or comment */ - for (sep = str; *sep != 0; sep++) - if (lnet_issep(*sep) || *sep == '#') - break; - - nob = sep - str; - if (nob > 0) { - ltb = lnet_new_text_buf(nob); - if (ltb == NULL) { - lnet_free_text_bufs(&pending); - return -1; - } - - for (i = 0; i < nob; i++) - if (lnet_iswhite(str[i])) - ltb->ltb_text[i] = ' '; - else - ltb->ltb_text[i] = str[i]; - - ltb->ltb_text[nob] = 0; - - list_add_tail(<b->ltb_list, &pending); - } - - if (*sep == '#') { - /* scan for separator */ - do { - sep++; - } while (*sep != 0 && !lnet_issep(*sep)); - } - - if (*sep == 0) - break; - - str = sep + 1; - } - - list_splice(&pending, tbs->prev); - return 0; -} - -int -lnet_expand1tb (struct list_head *list, - char *str, char *sep1, char *sep2, - char *item, int itemlen) -{ - int len1 = sep1 - str; - int len2 = strlen(sep2 + 1); - lnet_text_buf_t *ltb; - - LASSERT (*sep1 == '['); - LASSERT (*sep2 == ']'); - - ltb = lnet_new_text_buf(len1 + itemlen + len2); - if (ltb == NULL) - return -ENOMEM; - - memcpy(ltb->ltb_text, str, len1); - memcpy(<b->ltb_text[len1], item, itemlen); - memcpy(<b->ltb_text[len1+itemlen], sep2 + 1, len2); - ltb->ltb_text[len1 + itemlen + len2] = 0; - - list_add_tail(<b->ltb_list, list); - return 0; -} - -int -lnet_str2tbs_expand (struct list_head *tbs, char *str) -{ - char num[16]; - struct list_head pending; - char *sep; - char *sep2; - char *parsed; - char *enditem; - int lo; - int hi; - int stride; - int i; - int nob; - int scanned; - - INIT_LIST_HEAD(&pending); - - sep = strchr(str, '['); - if (sep == NULL) /* nothing to expand */ - return 0; - - sep2 = strchr(sep, ']'); - if (sep2 == NULL) - goto failed; - - for (parsed = sep; parsed < sep2; parsed = enditem) { - - enditem = ++parsed; - while (enditem < sep2 && *enditem != ',') - enditem++; - - if (enditem == parsed) /* no empty items */ - goto failed; - - if (sscanf(parsed, "%d-%d/%d%n", &lo, &hi, &stride, &scanned) < 3) { - - if (sscanf(parsed, "%d-%d%n", &lo, &hi, &scanned) < 2) { - - /* simple string enumeration */ - if (lnet_expand1tb(&pending, str, sep, sep2, - parsed, enditem - parsed) != 0) - goto failed; - - continue; - } - - stride = 1; - } - - /* range expansion */ - - if (enditem != parsed + scanned) /* no trailing junk */ - goto failed; - - if (hi < 0 || lo < 0 || stride < 0 || hi < lo || - (hi - lo) % stride != 0) - goto failed; - - for (i = lo; i <= hi; i += stride) { - - snprintf(num, sizeof(num), "%d", i); - nob = strlen(num); - if (nob + 1 == sizeof(num)) - goto failed; - - if (lnet_expand1tb(&pending, str, sep, sep2, - num, nob) != 0) - goto failed; - } - } - - list_splice(&pending, tbs->prev); - return 1; - - failed: - lnet_free_text_bufs(&pending); - return -1; -} - -int -lnet_parse_hops (char *str, unsigned int *hops) -{ - int len = strlen(str); - int nob = len; - - return (sscanf(str, "%u%n", hops, &nob) >= 1 && - nob == len && - *hops > 0 && *hops < 256); -} - - -int -lnet_parse_route (char *str, int *im_a_router) -{ - /* static scratch buffer OK (single threaded) */ - static char cmd[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head nets; - struct list_head gateways; - struct list_head *tmp1; - struct list_head *tmp2; - __u32 net; - lnet_nid_t nid; - lnet_text_buf_t *ltb; - int rc; - char *sep; - char *token = str; - int ntokens = 0; - int myrc = -1; - unsigned int hops; - int got_hops = 0; - - CFS_INIT_LIST_HEAD(&gateways); - CFS_INIT_LIST_HEAD(&nets); - - /* save a copy of the string for error messages */ - strncpy(cmd, str, sizeof(cmd) - 1); - cmd[sizeof(cmd) - 1] = 0; - - sep = str; - for (;;) { - /* scan for token start */ - while (lnet_iswhite(*sep)) - sep++; - if (*sep == 0) { - if (ntokens < (got_hops ? 3 : 2)) - goto token_error; - break; - } - - ntokens++; - token = sep++; - - /* scan for token end */ - while (*sep != 0 && !lnet_iswhite(*sep)) - sep++; - if (*sep != 0) - *sep++ = 0; - - if (ntokens == 1) { - tmp2 = &nets; /* expanding nets */ - } else if (ntokens == 2 && - lnet_parse_hops(token, &hops)) { - got_hops = 1; /* got a hop count */ - continue; - } else { - tmp2 = &gateways; /* expanding gateways */ - } - - ltb = lnet_new_text_buf(strlen(token)); - if (ltb == NULL) - goto out; - - strcpy(ltb->ltb_text, token); - tmp1 = <b->ltb_list; - list_add_tail(tmp1, tmp2); - - while (tmp1 != tmp2) { - ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); - - rc = lnet_str2tbs_expand(tmp1->next, ltb->ltb_text); - if (rc < 0) - goto token_error; - - tmp1 = tmp1->next; - - if (rc > 0) { /* expanded! */ - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - continue; - } - - if (ntokens == 1) { - net = libcfs_str2net(ltb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND) - goto token_error; - } else { - nid = libcfs_str2nid(ltb->ltb_text); - if (nid == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(nid)) == LOLND) - goto token_error; - } - } - } - - if (!got_hops) - hops = 1; - - LASSERT (!list_empty(&nets)); - LASSERT (!list_empty(&gateways)); - - list_for_each (tmp1, &nets) { - ltb = list_entry(tmp1, lnet_text_buf_t, ltb_list); - net = libcfs_str2net(ltb->ltb_text); - LASSERT (net != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each (tmp2, &gateways) { - ltb = list_entry(tmp2, lnet_text_buf_t, ltb_list); - nid = libcfs_str2nid(ltb->ltb_text); - LASSERT (nid != LNET_NID_ANY); - - if (lnet_islocalnid(nid)) { - *im_a_router = 1; - continue; - } - - rc = lnet_add_route (net, hops, nid); - if (rc != 0) { - CERROR("Can't create route " - "to %s via %s\n", - libcfs_net2str(net), - libcfs_nid2str(nid)); - goto out; - } - } - } - - myrc = 0; - goto out; - - token_error: - lnet_syntax("routes", cmd, token - str, strlen(token)); - out: - lnet_free_text_bufs(&nets); - lnet_free_text_bufs(&gateways); - return myrc; -} - -int -lnet_parse_route_tbs(struct list_head *tbs, int *im_a_router) -{ - lnet_text_buf_t *ltb; - - while (!list_empty(tbs)) { - ltb = list_entry(tbs->next, lnet_text_buf_t, ltb_list); - - if (lnet_parse_route(ltb->ltb_text, im_a_router) < 0) { - lnet_free_text_bufs(tbs); - return -EINVAL; - } - - list_del(<b->ltb_list); - lnet_free_text_buf(ltb); - } - - return 0; -} - -int -lnet_parse_routes (char *routes, int *im_a_router) -{ - struct list_head tbs; - int rc = 0; - - *im_a_router = 0; - - if (the_lnet.ln_ptlcompat > 0 && - routes[0] != 0) { - /* Can't route when running in compatibility mode */ - LCONSOLE_ERROR("Route tables are not supported when " - "'portals_compatible' is set\n"); - return -EINVAL; - } - - CFS_INIT_LIST_HEAD(&tbs); - - if (lnet_str2tbs_sep(&tbs, routes) < 0) { - CERROR("Error parsing routes\n"); - rc = -EINVAL; - } else { - rc = lnet_parse_route_tbs(&tbs, im_a_router); - } - - LASSERT (lnet_tbnob == 0); - return rc; -} - -void -lnet_print_range_exprs(struct list_head *exprs) -{ - struct list_head *e; - lnet_range_expr_t *lre; - - list_for_each(e, exprs) { - lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); - - CDEBUG(D_WARNING, "%d-%d/%d\n", - lre->lre_min, lre->lre_max, lre->lre_stride); - } - - CDEBUG(D_WARNING, "%d allocated\n", lnet_re_alloc); -} - -int -lnet_new_range_expr(struct list_head *exprs, int min, int max, int stride) -{ - lnet_range_expr_t *lre; - - CDEBUG(D_NET, "%d-%d/%d\n", min, max, stride); - - if (min < 0 || min > 255 || min > max || stride < 0) - return -EINVAL; - - LIBCFS_ALLOC(lre, sizeof(*lre)); - if (lre == NULL) - return -ENOMEM; - - lnet_re_alloc++; - - lre->lre_min = min; - lre->lre_max = max; - lre->lre_stride = stride; - - list_add(&lre->lre_list, exprs); - return 0; -} - -void -lnet_destroy_range_exprs(struct list_head *exprs) -{ - lnet_range_expr_t *lre; - - while (!list_empty(exprs)) { - lre = list_entry(exprs->next, lnet_range_expr_t, lre_list); - - list_del(&lre->lre_list); - LIBCFS_FREE(lre, sizeof(*lre)); - lnet_re_alloc--; - } -} - -int -lnet_parse_range_expr(struct list_head *exprs, char *str) -{ - int nob = strlen(str); - char *sep; - int n; - int x; - int y; - int z; - int rc; - - if (nob == 0) - return -EINVAL; - - if (!strcmp(str, "*")) /* match all */ - return lnet_new_range_expr(exprs, 0, 255, 1); - - n = nob; - if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { - /* simple number */ - return lnet_new_range_expr(exprs, x, x, 1); - } - - /* Has to be an expansion */ - if (!(str[0] == '[' && nob > 2 && str[nob-1] == ']')) - return -EINVAL; - - nob -= 2; - str++; - str[nob] = 0; - - do { - /* Comma separated list of expressions... */ - sep = strchr(str, ','); - if (sep != NULL) - *sep++ = 0; - - nob = strlen(str); - n = nob; - if (sscanf(str, "%u%n", &x, &n) >= 1 && n == nob) { - /* simple number */ - rc = lnet_new_range_expr(exprs, x, x, 1); - if (rc != 0) - return rc; - - continue; - } - - n = nob; - if (sscanf(str, "%u-%u%n", &x, &y, &n) >= 2 && n == nob) { - /* simple range */ - rc = lnet_new_range_expr(exprs, x, y, 1); - if (rc != 0) - return rc; - continue; - } - - n = nob; - if (sscanf(str, "%u-%u/%u%n", &x, &y, &z, &n) >= 3 && n == nob) { - /* strided range */ - rc = lnet_new_range_expr(exprs, x, y, z); - if (rc != 0) - return rc; - continue; - } - - return -EINVAL; - - } while ((str = sep) != NULL); - - return 0; -} - -int -lnet_match_network_token(char *token, __u32 *ipaddrs, int nip) -{ - struct list_head exprs[4]; - struct list_head *e; - lnet_range_expr_t *re; - char *str; - int i; - int j; - __u32 ip; - int n; - int match; - int rc; - - for (i = 0; i < 4; i++) - CFS_INIT_LIST_HEAD(&exprs[i]); - - for (i = 0; i < 4; i++) { - str = token; - if (i != 3) { - token = strchr(token, '.'); - if (token == NULL) { - rc = -EINVAL; - goto out; - } - *token++ = 0; - } - - rc = lnet_parse_range_expr(&exprs[i], str); - if (rc != 0) { - LASSERT (rc < 0); - goto out; - } - } - - for (match = i = 0; !match && i < nip; i++) { - ip = ipaddrs[i]; - - for (match = 1, j = 0; match && j < 4; j++) { - n = (ip >> (8 * (3 - j))) & 0xff; - match = 0; - - list_for_each(e, &exprs[j]) { - re = list_entry(e, lnet_range_expr_t, lre_list); - - if (re->lre_min <= n && - re->lre_max >= n && - (n - re->lre_min) % re->lre_stride == 0) { - match = 1; - break; - } - } - } - } - - rc = match ? 1 : 0; - - out: - for (i = 0; i < 4; i++) - lnet_destroy_range_exprs(&exprs[i]); - LASSERT (lnet_re_alloc == 0); - - return rc; -} - -int -lnet_match_network_tokens(char *net_entry, __u32 *ipaddrs, int nip) -{ - static char tokens[LNET_SINGLE_TEXTBUF_NOB]; - - int matched = 0; - int ntokens = 0; - int len; - char *net = NULL; - char *sep; - char *token; - int rc; - - LASSERT (strlen(net_entry) < sizeof(tokens)); - - /* work on a copy of the string */ - strcpy(tokens, net_entry); - sep = tokens; - for (;;) { - /* scan for token start */ - while (lnet_iswhite(*sep)) - sep++; - if (*sep == 0) - break; - - token = sep++; - - /* scan for token end */ - while (*sep != 0 && !lnet_iswhite(*sep)) - sep++; - if (*sep != 0) - *sep++ = 0; - - if (ntokens++ == 0) { - net = token; - continue; - } - - len = strlen(token); - - rc = lnet_match_network_token(token, ipaddrs, nip); - if (rc < 0) { - lnet_syntax("ip2nets", net_entry, - token - tokens, len); - return rc; - } - - matched |= (rc != 0); - } - - if (!matched) - return 0; - - strcpy(net_entry, net); /* replace with matched net */ - return 1; -} - -__u32 -lnet_netspec2net(char *netspec) -{ - char *bracket = strchr(netspec, '('); - __u32 net; - - if (bracket != NULL) - *bracket = 0; - - net = libcfs_str2net(netspec); - - if (bracket != NULL) - *bracket = '('; - - return net; -} - -int -lnet_splitnets(char *source, struct list_head *nets) -{ - int offset = 0; - int offset2; - int len; - lnet_text_buf_t *tb; - lnet_text_buf_t *tb2; - struct list_head *t; - char *sep; - char *bracket; - __u32 net; - - LASSERT (!list_empty(nets)); - LASSERT (nets->next == nets->prev); /* single entry */ - - tb = list_entry(nets->next, lnet_text_buf_t, ltb_list); - - for (;;) { - sep = strchr(tb->ltb_text, ','); - bracket = strchr(tb->ltb_text, '('); - - if (sep != NULL && - bracket != NULL && - bracket < sep) { - /* netspec lists interfaces... */ - - offset2 = offset + (bracket - tb->ltb_text); - len = strlen(bracket); - - bracket = strchr(bracket + 1, ')'); - - if (bracket == NULL || - !(bracket[1] == ',' || bracket[1] == 0)) { - lnet_syntax("ip2nets", source, offset2, len); - return -EINVAL; - } - - sep = (bracket[1] == 0) ? NULL : bracket + 1; - } - - if (sep != NULL) - *sep++ = 0; - - net = lnet_netspec2net(tb->ltb_text); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - - list_for_each(t, nets) { - tb2 = list_entry(t, lnet_text_buf_t, ltb_list); - - if (tb2 == tb) - continue; - - if (net == lnet_netspec2net(tb2->ltb_text)) { - /* duplicate network */ - lnet_syntax("ip2nets", source, offset, - strlen(tb->ltb_text)); - return -EINVAL; - } - } - - if (sep == NULL) - return 0; - - offset += sep - tb->ltb_text; - tb2 = lnet_new_text_buf(strlen(sep)); - if (tb2 == NULL) - return -ENOMEM; - - strcpy(tb2->ltb_text, sep); - list_add_tail(&tb2->ltb_list, nets); - - tb = tb2; - } -} - -int -lnet_match_networks (char **networksp, char *ip2nets, __u32 *ipaddrs, int nip) -{ - static char networks[LNET_SINGLE_TEXTBUF_NOB]; - static char source[LNET_SINGLE_TEXTBUF_NOB]; - - struct list_head raw_entries; - struct list_head matched_nets; - struct list_head current_nets; - struct list_head *t; - struct list_head *t2; - lnet_text_buf_t *tb; - lnet_text_buf_t *tb2; - __u32 net1; - __u32 net2; - int len; - int count; - int dup; - int rc; - - CFS_INIT_LIST_HEAD(&raw_entries); - if (lnet_str2tbs_sep(&raw_entries, ip2nets) < 0) { - CERROR("Error parsing ip2nets\n"); - LASSERT (lnet_tbnob == 0); - return -EINVAL; - } - - CFS_INIT_LIST_HEAD(&matched_nets); - CFS_INIT_LIST_HEAD(¤t_nets); - networks[0] = 0; - count = 0; - len = 0; - rc = 0; - - while (!list_empty(&raw_entries)) { - tb = list_entry(raw_entries.next, lnet_text_buf_t, ltb_list); - - strncpy(source, tb->ltb_text, sizeof(source)-1); - source[sizeof(source)-1] = 0; - - /* replace ltb_text with the network(s) add on match */ - rc = lnet_match_network_tokens(tb->ltb_text, ipaddrs, nip); - if (rc < 0) - break; - - list_del(&tb->ltb_list); - - if (rc == 0) { /* no match */ - lnet_free_text_buf(tb); - continue; - } - - /* split into separate networks */ - CFS_INIT_LIST_HEAD(¤t_nets); - list_add(&tb->ltb_list, ¤t_nets); - rc = lnet_splitnets(source, ¤t_nets); - if (rc < 0) - break; - - dup = 0; - list_for_each (t, ¤t_nets) { - tb = list_entry(t, lnet_text_buf_t, ltb_list); - net1 = lnet_netspec2net(tb->ltb_text); - LASSERT (net1 != LNET_NIDNET(LNET_NID_ANY)); - - list_for_each(t2, &matched_nets) { - tb2 = list_entry(t2, lnet_text_buf_t, ltb_list); - net2 = lnet_netspec2net(tb2->ltb_text); - LASSERT (net2 != LNET_NIDNET(LNET_NID_ANY)); - - if (net1 == net2) { - dup = 1; - break; - } - } - - if (dup) - break; - } - - if (dup) { - lnet_free_text_bufs(¤t_nets); - continue; - } - - list_for_each_safe(t, t2, ¤t_nets) { - tb = list_entry(t, lnet_text_buf_t, ltb_list); - - list_del(&tb->ltb_list); - list_add_tail(&tb->ltb_list, &matched_nets); - - len += snprintf(networks + len, sizeof(networks) - len, - "%s%s", (len == 0) ? "" : ",", - tb->ltb_text); - - if (len >= sizeof(networks)) { - CERROR("Too many matched networks\n"); - rc = -E2BIG; - goto out; - } - } - - count++; - } - - out: - lnet_free_text_bufs(&raw_entries); - lnet_free_text_bufs(&matched_nets); - lnet_free_text_bufs(¤t_nets); - LASSERT (lnet_tbnob == 0); - - if (rc < 0) - return rc; - - *networksp = networks; - return count; -} - -#ifdef __KERNEL__ -void -lnet_ipaddr_free_enumeration(__u32 *ipaddrs, int nip) -{ - LIBCFS_FREE(ipaddrs, nip * sizeof(*ipaddrs)); -} - -int -lnet_ipaddr_enumerate (__u32 **ipaddrsp) -{ - int up; - __u32 netmask; - __u32 *ipaddrs; - __u32 *ipaddrs2; - int nip; - char **ifnames; - int nif = libcfs_ipif_enumerate(&ifnames); - int i; - int rc; - - if (nif <= 0) - return nif; - - LIBCFS_ALLOC(ipaddrs, nif * sizeof(*ipaddrs)); - if (ipaddrs == NULL) { - CERROR("Can't allocate ipaddrs[%d]\n", nif); - libcfs_ipif_free_enumeration(ifnames, nif); - return -ENOMEM; - } - - for (i = nip = 0; i < nif; i++) { - if (!strcmp(ifnames[i], "lo")) - continue; - - rc = libcfs_ipif_query(ifnames[i], &up, - &ipaddrs[nip], &netmask); - if (rc != 0) { - CWARN("Can't query interface %s: %d\n", - ifnames[i], rc); - continue; - } - - if (!up) { - CWARN("Ignoring interface %s: it's down\n", - ifnames[i]); - continue; - } - - nip++; - } - - libcfs_ipif_free_enumeration(ifnames, nif); - - if (nip == nif) { - *ipaddrsp = ipaddrs; - } else { - if (nip > 0) { - LIBCFS_ALLOC(ipaddrs2, nip * sizeof(*ipaddrs2)); - if (ipaddrs2 == NULL) { - CERROR("Can't allocate ipaddrs[%d]\n", nip); - nip = -ENOMEM; - } else { - memcpy(ipaddrs2, ipaddrs, - nip * sizeof(*ipaddrs)); - *ipaddrsp = ipaddrs2; - rc = nip; - } - } - lnet_ipaddr_free_enumeration(ipaddrs, nif); - } - return nip; -} - -int -lnet_parse_ip2nets (char **networksp, char *ip2nets) -{ - __u32 *ipaddrs; - int nip = lnet_ipaddr_enumerate(&ipaddrs); - int rc; - - if (nip < 0) { - LCONSOLE_ERROR("Error %d enumerating local IP interfaces " - "for ip2nets to match\n", nip); - return nip; - } - - if (nip == 0) { - LCONSOLE_ERROR("No local IP interfaces " - "for ip2nets to match\n"); - return -ENOENT; - } - - rc = lnet_match_networks(networksp, ip2nets, ipaddrs, nip); - lnet_ipaddr_free_enumeration(ipaddrs, nip); - - if (rc < 0) { - LCONSOLE_ERROR("Error %d parsing ip2nets\n", rc); - return rc; - } - - if (rc == 0) { - LCONSOLE_ERROR("ip2nets does not match " - "any local IP interfaces\n"); - return -ENOENT; - } - - return 0; -} - -int -lnet_set_ip_niaddr (lnet_ni_t *ni) -{ - __u32 net = LNET_NIDNET(ni->ni_nid); - char **names; - int n; - __u32 ip; - __u32 netmask; - int up; - int i; - int rc; - - /* Convenience for LNDs that use the IP address of a local interface as - * the local address part of their NID */ - - if (ni->ni_interfaces[0] != NULL) { - - CLASSERT (LNET_MAX_INTERFACES > 1); - - if (ni->ni_interfaces[1] != NULL) { - CERROR("Net %s doesn't support multiple interfaces\n", - libcfs_net2str(net)); - return -EPERM; - } - - rc = libcfs_ipif_query(ni->ni_interfaces[0], - &up, &ip, &netmask); - if (rc != 0) { - CERROR("Net %s can't query interface %s: %d\n", - libcfs_net2str(net), ni->ni_interfaces[0], rc); - return -EPERM; - } - - if (!up) { - CERROR("Net %s can't use interface %s: it's down\n", - libcfs_net2str(net), ni->ni_interfaces[0]); - return -ENETDOWN; - } - - ni->ni_nid = LNET_MKNID(net, ip); - return 0; - } - - n = libcfs_ipif_enumerate(&names); - if (n <= 0) { - CERROR("Net %s can't enumerate interfaces: %d\n", - libcfs_net2str(net), n); - return 0; - } - - for (i = 0; i < n; i++) { - if (!strcmp(names[i], "lo")) /* skip the loopback IF */ - continue; - - rc = libcfs_ipif_query(names[i], &up, &ip, &netmask); - - if (rc != 0) { - CWARN("Net %s can't query interface %s: %d\n", - libcfs_net2str(net), names[i], rc); - continue; - } - - if (!up) { - CWARN("Net %s ignoring interface %s (down)\n", - libcfs_net2str(net), names[i]); - continue; - } - - libcfs_ipif_free_enumeration(names, n); - ni->ni_nid = LNET_MKNID(net, ip); - return 0; - } - - CERROR("Net %s can't find any interfaces\n", libcfs_net2str(net)); - libcfs_ipif_free_enumeration(names, n); - return -ENOENT; -} -EXPORT_SYMBOL(lnet_set_ip_niaddr); - -#endif diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c deleted file mode 100644 index 5bae602fa88efd61097f6564ae07a61faf13efb1..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-eq.c +++ /dev/null @@ -1,318 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-eq.c - * Library level Event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -int -LNetEQAlloc(unsigned int count, lnet_eq_handler_t callback, - lnet_handle_eq_t *handle) -{ - lnet_eq_t *eq; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - /* We need count to be a power of 2 so that when eq_{enq,deq}_seq - * overflow, they don't skip entries, so the queue has the same - * apparant capacity at all times */ - - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); - - count <<= 1; /* ...and round up */ - } - - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (-EINVAL); - - eq = lnet_eq_alloc(); - if (eq == NULL) - return (-ENOMEM); - - LIBCFS_ALLOC(eq->eq_events, count * sizeof(lnet_event_t)); - if (eq->eq_events == NULL) { - LNET_LOCK(); - lnet_eq_free (eq); - LNET_UNLOCK(); - - return -ENOMEM; - } - - /* NB this resets all event sequence numbers to 0, to be earlier - * than eq_deq_seq */ - memset(eq->eq_events, 0, count * sizeof(lnet_event_t)); - - eq->eq_deq_seq = 1; - eq->eq_enq_seq = 1; - eq->eq_size = count; - eq->eq_refcount = 0; - eq->eq_callback = callback; - - LNET_LOCK(); - - lnet_initialise_handle (&eq->eq_lh, LNET_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &the_lnet.ln_active_eqs); - - LNET_UNLOCK(); - - lnet_eq2handle(handle, eq); - return (0); -} - -int -LNetEQFree(lnet_handle_eq_t eqh) -{ - lnet_eq_t *eq; - int size; - lnet_event_t *events; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - eq = lnet_handle2eq(&eqh); - if (eq == NULL) { - LNET_UNLOCK(); - return (-ENOENT); - } - - if (eq->eq_refcount != 0) { - LNET_UNLOCK(); - return (-EBUSY); - } - - /* stash for free after lock dropped */ - events = eq->eq_events; - size = eq->eq_size; - - lnet_invalidate_handle (&eq->eq_lh); - list_del (&eq->eq_list); - lnet_eq_free (eq); - - LNET_UNLOCK(); - - LIBCFS_FREE(events, size * sizeof (lnet_event_t)); - - return 0; -} - -int -lib_get_event (lnet_eq_t *eq, lnet_event_t *ev) -{ - int new_index = eq->eq_deq_seq & (eq->eq_size - 1); - lnet_event_t *new_event = &eq->eq_events[new_index]; - int rc; - ENTRY; - - CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->eq_deq_seq, eq->eq_size); - - if (LNET_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) { - RETURN(0); - } - - /* We've got a new event... */ - *ev = *new_event; - - /* ...but did it overwrite an event we've not seen yet? */ - if (eq->eq_deq_seq == new_event->sequence) { - rc = 1; - } else { - /* don't complain with CERROR: some EQs are sized small - * anyway; if it's important, the caller should complain */ - CDEBUG(D_NET, "Event Queue Overflow: eq seq %lu ev seq %lu\n", - eq->eq_deq_seq, new_event->sequence); - rc = -EOVERFLOW; - } - - eq->eq_deq_seq = new_event->sequence + 1; - RETURN(rc); -} - - -int -LNetEQGet (lnet_handle_eq_t eventq, lnet_event_t *event) -{ - int which; - - return LNetEQPoll(&eventq, 1, 0, - event, &which); -} - -int -LNetEQWait (lnet_handle_eq_t eventq, lnet_event_t *event) -{ - int which; - - return LNetEQPoll(&eventq, 1, LNET_TIME_FOREVER, - event, &which); -} - -int -LNetEQPoll (lnet_handle_eq_t *eventqs, int neq, int timeout_ms, - lnet_event_t *event, int *which) -{ - int i; - int rc; -#ifdef __KERNEL__ - cfs_waitlink_t wl; - cfs_time_t now; -#else - struct timeval then; - struct timeval now; -# ifdef HAVE_LIBPTHREAD - struct timespec ts; -# endif - lnet_ni_t *eqwaitni = the_lnet.ln_eqwaitni; -#endif - ENTRY; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (neq < 1) - RETURN(-ENOENT); - - LNET_LOCK(); - - for (;;) { - for (i = 0; i < neq; i++) { - lnet_eq_t *eq = lnet_handle2eq(&eventqs[i]); - - if (eq == NULL) { - LNET_UNLOCK(); - RETURN(-ENOENT); - } - - rc = lib_get_event (eq, event); - if (rc != 0) { - LNET_UNLOCK(); - *which = i; - RETURN(rc); - } - } - -#ifdef __KERNEL__ - if (timeout_ms == 0) { - LNET_UNLOCK (); - RETURN (0); - } - - cfs_waitlink_init(&wl); - set_current_state(TASK_INTERRUPTIBLE); - cfs_waitq_add(&the_lnet.ln_waitq, &wl); - - LNET_UNLOCK(); - - if (timeout_ms < 0) { - cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE); - } else { - struct timeval tv; - - now = cfs_time_current(); - cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(timeout_ms)/1000); - cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), - &tv); - timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000; - if (timeout_ms < 0) - timeout_ms = 0; - } - - LNET_LOCK(); - cfs_waitq_del(&the_lnet.ln_waitq, &wl); -#else - if (eqwaitni != NULL) { - /* I have a single NI that I have to call into, to get - * events queued, or to block. */ - lnet_ni_addref_locked(eqwaitni); - LNET_UNLOCK(); - - if (timeout_ms <= 0) { - (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); - } else { - gettimeofday(&then, NULL); - - (eqwaitni->ni_lnd->lnd_wait)(eqwaitni, timeout_ms); - - gettimeofday(&now, NULL); - timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + - (now.tv_usec - then.tv_usec) / 1000; - if (timeout_ms < 0) - timeout_ms = 0; - } - - LNET_LOCK(); - lnet_ni_decref_locked(eqwaitni); - - /* don't call into eqwaitni again if timeout has - * expired */ - if (timeout_ms == 0) - eqwaitni = NULL; - - continue; /* go back and check for events */ - } - - if (timeout_ms == 0) { - LNET_UNLOCK(); - RETURN (0); - } - -# ifndef HAVE_LIBPTHREAD - /* If I'm single-threaded, LNET fails at startup if it can't - * set the_lnet.ln_eqwaitni correctly. */ - LBUG(); -# else - if (timeout_ms < 0) { - pthread_cond_wait(&the_lnet.ln_cond, - &the_lnet.ln_lock); - } else { - gettimeofday(&then, NULL); - - ts.tv_sec = then.tv_sec + timeout_ms/1000; - ts.tv_nsec = then.tv_usec * 1000 + - (timeout_ms%1000) * 1000000; - if (ts.tv_nsec >= 1000000000) { - ts.tv_sec++; - ts.tv_nsec -= 1000000000; - } - - pthread_cond_timedwait(&the_lnet.ln_cond, - &the_lnet.ln_lock, &ts); - - gettimeofday(&now, NULL); - timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 + - (now.tv_usec - then.tv_usec) / 1000; - - if (timeout_ms < 0) - timeout_ms = 0; - } -# endif -#endif - } -} diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c deleted file mode 100644 index 0e8524c8bd32eda7d76a17c29c1b13d134060f68..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-md.c +++ /dev/null @@ -1,317 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-md.c - * Memory Descriptor management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -/* must be called with LNET_LOCK held */ -void -lnet_md_unlink(lnet_libmd_t *md) -{ - if ((md->md_flags & LNET_MD_FLAG_ZOMBIE) == 0) { - /* first unlink attempt... */ - lnet_me_t *me = md->md_me; - - md->md_flags |= LNET_MD_FLAG_ZOMBIE; - - /* Disassociate from ME (if any), and unlink it if it was created - * with LNET_UNLINK */ - if (me != NULL) { - me->me_md = NULL; - if (me->me_unlink == LNET_UNLINK) - lnet_me_unlink(me); - } - - /* emsure all future handle lookups fail */ - lnet_invalidate_handle(&md->md_lh); - } - - if (md->md_refcount != 0) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if (md->md_eq != NULL) { - md->md_eq->eq_refcount--; - LASSERT (md->md_eq->eq_refcount >= 0); - } - - list_del (&md->md_list); - lnet_md_free(md); -} - -/* must be called with LNET_LOCK held */ -static int -lib_md_build(lnet_libmd_t *lmd, lnet_md_t *umd, int unlink) -{ - lnet_eq_t *eq = NULL; - int i; - unsigned int niov; - int total_length = 0; - - /* NB we are passed an allocated, but uninitialised/active md. - * if we return success, caller may lnet_md_unlink() it. - * otherwise caller may only lnet_md_free() it. - */ - - if (!LNetHandleIsEqual (umd->eq_handle, LNET_EQ_NONE)) { - eq = lnet_handle2eq(&umd->eq_handle); - if (eq == NULL) - return -ENOENT; - } - - /* This implementation doesn't know how to create START events or - * disable END events. Best to LASSERT our caller is compliant so - * we find out quickly... */ - /* TODO - reevaluate what should be here in light of - * the removal of the start and end events - * maybe there we shouldn't even allow LNET_EQ_NONE!) - LASSERT (eq == NULL); - */ - - lmd->md_me = NULL; - lmd->md_start = umd->start; - lmd->md_offset = 0; - lmd->md_max_size = umd->max_size; - lmd->md_options = umd->options; - lmd->md_user_ptr = umd->user_ptr; - lmd->md_eq = eq; - lmd->md_threshold = umd->threshold; - lmd->md_refcount = 0; - lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0; - - if ((umd->options & LNET_MD_IOVEC) != 0) { - - if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */ - return -EINVAL; - - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.iov, umd->start, - niov * sizeof (lmd->md_iov.iov[0])); - - for (i = 0; i < niov; i++) { - /* We take the base address on trust */ - if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return -EINVAL; - - total_length += lmd->md_iov.iov[i].iov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return -EINVAL; - - } else if ((umd->options & LNET_MD_KIOV) != 0) { -#ifndef __KERNEL__ - return -EINVAL; -#else - lmd->md_niov = niov = umd->length; - memcpy(lmd->md_iov.kiov, umd->start, - niov * sizeof (lmd->md_iov.kiov[0])); - - for (i = 0; i < niov; i++) { - /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].kiov_offset + - lmd->md_iov.kiov[i].kiov_len > CFS_PAGE_SIZE ) - return -EINVAL; /* invalid length */ - - total_length += lmd->md_iov.kiov[i].kiov_len; - } - - lmd->md_length = total_length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > total_length)) // illegal max_size - return -EINVAL; -#endif - } else { /* contiguous */ - lmd->md_length = umd->length; - lmd->md_niov = niov = 1; - lmd->md_iov.iov[0].iov_base = umd->start; - lmd->md_iov.iov[0].iov_len = umd->length; - - if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || - umd->max_size > umd->length)) // illegal max_size - return -EINVAL; - } - - if (eq != NULL) - eq->eq_refcount++; - - /* It's good; let handle2md succeed and add to active mds */ - lnet_initialise_handle (&lmd->md_lh, LNET_COOKIE_TYPE_MD); - list_add (&lmd->md_list, &the_lnet.ln_active_mds); - - return 0; -} - -/* must be called with LNET_LOCK held */ -void -lnet_md_deconstruct(lnet_libmd_t *lmd, lnet_md_t *umd) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - umd->start = lmd->md_start; - umd->length = ((lmd->md_options & (LNET_MD_IOVEC | LNET_MD_KIOV)) == 0) ? - lmd->md_length : lmd->md_niov; - umd->threshold = lmd->md_threshold; - umd->max_size = lmd->md_max_size; - umd->options = lmd->md_options; - umd->user_ptr = lmd->md_user_ptr; - lnet_eq2handle(&umd->eq_handle, lmd->md_eq); -} - -int -LNetMDAttach(lnet_handle_me_t meh, lnet_md_t umd, - lnet_unlink_t unlink, lnet_handle_md_t *handle) -{ - lnet_me_t *me; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && - umd.length > LNET_MAX_IOV) /* too many fragments */ - return -EINVAL; - - md = lnet_md_alloc(&umd); - if (md == NULL) - return -ENOMEM; - - LNET_LOCK(); - - me = lnet_handle2me(&meh); - if (me == NULL) { - rc = -ENOENT; - } else if (me->me_md != NULL) { - rc = -EBUSY; - } else { - rc = lib_md_build(md, &umd, unlink); - if (rc == 0) { - me->me_md = md; - md->md_me = me; - - lnet_md2handle(handle, md); - - /* check if this MD matches any blocked msgs */ - lnet_match_blocked_msg(md); /* expects LNET_LOCK held */ - - LNET_UNLOCK(); - return (0); - } - } - - lnet_md_free (md); - - LNET_UNLOCK(); - return (rc); -} - -int -LNetMDBind(lnet_md_t umd, lnet_unlink_t unlink, lnet_handle_md_t *handle) -{ - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if ((umd.options & (LNET_MD_KIOV | LNET_MD_IOVEC)) != 0 && - umd.length > LNET_MAX_IOV) /* too many fragments */ - return -EINVAL; - - md = lnet_md_alloc(&umd); - if (md == NULL) - return -ENOMEM; - - LNET_LOCK(); - - rc = lib_md_build(md, &umd, unlink); - - if (rc == 0) { - lnet_md2handle(handle, md); - - LNET_UNLOCK(); - return (0); - } - - lnet_md_free (md); - - LNET_UNLOCK(); - return (rc); -} - -int -LNetMDUnlink (lnet_handle_md_t mdh) -{ - lnet_event_t ev; - lnet_libmd_t *md; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL) { - LNET_UNLOCK(); - return -ENOENT; - } - - /* If the MD is busy, lnet_md_unlink just marks it for deletion, and - * when the NAL is done, the completion event flags that the MD was - * unlinked. Otherwise, we enqueue an event now... */ - - if (md->md_eq != NULL && - md->md_refcount == 0) { - memset(&ev, 0, sizeof(ev)); - - ev.type = LNET_EVENT_UNLINK; - ev.status = 0; - ev.unlinked = 1; - lnet_md_deconstruct(md, &ev.md); - lnet_md2handle(&ev.md_handle, md); - - lnet_enq_event_locked(md->md_eq, &ev); - } - - lnet_md_unlink(md); - - LNET_UNLOCK(); - return 0; -} - diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c deleted file mode 100644 index fb72c6d7d1d8d8938906cde8b9234b4905d136ed..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-me.c +++ /dev/null @@ -1,173 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-me.c - * Match Entry management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -int -LNetMEAttach(unsigned int portal, - lnet_process_id_t match_id, - __u64 match_bits, __u64 ignore_bits, - lnet_unlink_t unlink, lnet_ins_pos_t pos, - lnet_handle_me_t *handle) -{ - lnet_me_t *me; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (portal >= the_lnet.ln_nportals) - return -EINVAL; - - me = lnet_me_alloc(); - if (me == NULL) - return -ENOMEM; - - LNET_LOCK(); - - me->me_portal = portal; - me->me_match_id = match_id; - me->me_match_bits = match_bits; - me->me_ignore_bits = ignore_bits; - me->me_unlink = unlink; - me->me_md = NULL; - - lnet_initialise_handle (&me->me_lh, LNET_COOKIE_TYPE_ME); - - if (pos == LNET_INS_AFTER) - list_add_tail(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml)); - else - list_add(&me->me_list, &(the_lnet.ln_portals[portal].ptl_ml)); - - lnet_me2handle(handle, me); - - LNET_UNLOCK(); - - return 0; -} - -int -LNetMEInsert(lnet_handle_me_t current_meh, - lnet_process_id_t match_id, - __u64 match_bits, __u64 ignore_bits, - lnet_unlink_t unlink, lnet_ins_pos_t pos, - lnet_handle_me_t *handle) -{ - lnet_me_t *current_me; - lnet_me_t *new_me; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - new_me = lnet_me_alloc(); - if (new_me == NULL) - return -ENOMEM; - - LNET_LOCK(); - - current_me = lnet_handle2me(¤t_meh); - if (current_me == NULL) { - lnet_me_free (new_me); - - LNET_UNLOCK(); - return -ENOENT; - } - - new_me->me_match_id = match_id; - new_me->me_match_bits = match_bits; - new_me->me_ignore_bits = ignore_bits; - new_me->me_unlink = unlink; - new_me->me_md = NULL; - - lnet_initialise_handle (&new_me->me_lh, LNET_COOKIE_TYPE_ME); - - if (pos == LNET_INS_AFTER) - list_add_tail(&new_me->me_list, ¤t_me->me_list); - else - list_add(&new_me->me_list, ¤t_me->me_list); - - lnet_me2handle(handle, new_me); - - LNET_UNLOCK(); - - return 0; -} - -int -LNetMEUnlink(lnet_handle_me_t meh) -{ - lnet_me_t *me; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - me = lnet_handle2me(&meh); - if (me == NULL) { - rc = -ENOENT; - } else { - lnet_me_unlink(me); - rc = 0; - } - - LNET_UNLOCK(); - - return (rc); -} - -/* call with LNET_LOCK please */ -void -lnet_me_unlink(lnet_me_t *me) -{ - list_del (&me->me_list); - - if (me->me_md) { - me->me_md->md_me = NULL; - lnet_md_unlink(me->me_md); - } - - lnet_invalidate_handle (&me->me_lh); - lnet_me_free(me); -} - -#if 0 -static void -lib_me_dump(lnet_me_t *me) -{ - CWARN("Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); - - CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", - me->me_match_bits, me->me_ignore_bits); - - CWARN("\tMD\t= %p\n", me->md); - CWARN("\tprev\t= %p\n", - list_entry(me->me_list.prev, lnet_me_t, me_list)); - CWARN("\tnext\t= %p\n", - list_entry(me->me_list.next, lnet_me_t, me_list)); -} -#endif diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c deleted file mode 100644 index f598c0ba8c740635bca6e01d9d0fe8ba75638786..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-move.c +++ /dev/null @@ -1,2575 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -static int local_nid_dist_zero = 1; -CFS_MODULE_PARM(local_nid_dist_zero, "i", int, 0444, - "Reserved"); - -/* forward ref */ -static void lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg); -static void lnet_drop_delayed_put(lnet_msg_t *msg, char *reason); - -#define LNET_MATCHMD_NONE 0 /* Didn't match */ -#define LNET_MATCHMD_OK 1 /* Matched OK */ -#define LNET_MATCHMD_DROP 2 /* Must be disarded */ - -static int -lnet_try_match_md (int index, int op_mask, lnet_process_id_t src, - unsigned int rlength, unsigned int roffset, - __u64 match_bits, lnet_libmd_t *md, lnet_msg_t *msg, - unsigned int *mlength_out, unsigned int *offset_out) -{ - /* ALWAYS called holding the LNET_LOCK, and can't LNET_UNLOCK; - * lnet_match_blocked_msg() relies on this to avoid races */ - unsigned int offset; - unsigned int mlength; - lnet_me_t *me = md->md_me; - - /* mismatched MD op */ - if ((md->md_options & op_mask) == 0) - return LNET_MATCHMD_NONE; - - /* MD exhausted */ - if (lnet_md_exhausted(md)) - return LNET_MATCHMD_NONE; - - /* mismatched ME nid/pid? */ - if (me->me_match_id.nid != LNET_NID_ANY && - me->me_match_id.nid != src.nid) - return LNET_MATCHMD_NONE; - - if (me->me_match_id.pid != LNET_PID_ANY && - me->me_match_id.pid != src.pid) - return LNET_MATCHMD_NONE; - - /* mismatched ME matchbits? */ - if (((me->me_match_bits ^ match_bits) & ~me->me_ignore_bits) != 0) - return LNET_MATCHMD_NONE; - - /* Hurrah! This _is_ a match; check it out... */ - - if ((md->md_options & LNET_MD_MANAGE_REMOTE) == 0) - offset = md->md_offset; - else - offset = roffset; - - if ((md->md_options & LNET_MD_MAX_SIZE) != 0) { - mlength = md->md_max_size; - LASSERT (md->md_offset + mlength <= md->md_length); - } else { - mlength = md->md_length - offset; - } - - if (rlength <= mlength) { /* fits in allowed space */ - mlength = rlength; - } else if ((md->md_options & LNET_MD_TRUNCATE) == 0) { - /* this packet _really_ is too big */ - CERROR("Matching packet from %s, match "LPU64 - " length %d too big: %d left, %d allowed\n", - libcfs_id2str(src), match_bits, rlength, - md->md_length - offset, mlength); - - return LNET_MATCHMD_DROP; - } - - /* Commit to this ME/MD */ - CDEBUG(D_NET, "Incoming %s index %x from %s of " - "length %d/%d into md "LPX64" [%d] + %d\n", - (op_mask == LNET_MD_OP_PUT) ? "put" : "get", - index, libcfs_id2str(src), mlength, rlength, - md->md_lh.lh_cookie, md->md_niov, offset); - - lnet_commit_md(md, msg); - md->md_offset = offset + mlength; - - /* NB Caller will set ev.type and ev.hdr_data */ - msg->msg_ev.initiator = src; - msg->msg_ev.pt_index = index; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = rlength; - msg->msg_ev.mlength = mlength; - msg->msg_ev.offset = offset; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - *offset_out = offset; - *mlength_out = mlength; - - /* Auto-unlink NOW, so the ME gets unlinked if required. - * We bumped md->md_refcount above so the MD just gets flagged - * for unlink when it is finalized. */ - if ((md->md_flags & LNET_MD_FLAG_AUTO_UNLINK) != 0 && - lnet_md_exhausted(md)) { - lnet_md_unlink(md); - } - - return LNET_MATCHMD_OK; -} - -static int -lnet_match_md(int index, int op_mask, lnet_process_id_t src, - unsigned int rlength, unsigned int roffset, - __u64 match_bits, lnet_msg_t *msg, - unsigned int *mlength_out, unsigned int *offset_out, - lnet_libmd_t **md_out) -{ - lnet_portal_t *ptl = &the_lnet.ln_portals[index]; - struct list_head *tmp; - lnet_me_t *me; - lnet_libmd_t *md; - int rc; - - CDEBUG (D_NET, "Request from %s of length %d into portal %d " - "MB="LPX64"\n", libcfs_id2str(src), rlength, index, match_bits); - - if (index < 0 || index >= the_lnet.ln_nportals) { - CERROR("Invalid portal %d not in [0-%d]\n", - index, the_lnet.ln_nportals); - return LNET_MATCHMD_DROP; - } - - list_for_each (tmp, &ptl->ptl_ml) { - me = list_entry(tmp, lnet_me_t, me_list); - md = me->me_md; - - /* ME attached but MD not attached yet */ - if (md == NULL) - continue; - - LASSERT (me == md->md_me); - - rc = lnet_try_match_md(index, op_mask, src, rlength, - roffset, match_bits, md, msg, - mlength_out, offset_out); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_NONE: - continue; - - case LNET_MATCHMD_OK: - *md_out = md; - return LNET_MATCHMD_OK; - - case LNET_MATCHMD_DROP: - return LNET_MATCHMD_DROP; - } - /* not reached */ - } - - if (op_mask == LNET_MD_OP_GET || - (ptl->ptl_options & LNET_PTL_LAZY) == 0) - return LNET_MATCHMD_DROP; - - return LNET_MATCHMD_NONE; -} - -int -lnet_fail_nid (lnet_nid_t nid, unsigned int threshold) -{ - lnet_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - LASSERT (the_lnet.ln_init); - - if (threshold != 0) { - /* Adding a new entry */ - LIBCFS_ALLOC(tp, sizeof(*tp)); - if (tp == NULL) - return -ENOMEM; - - tp->tp_nid = nid; - tp->tp_threshold = threshold; - - LNET_LOCK(); - list_add_tail (&tp->tp_list, &the_lnet.ln_test_peers); - LNET_UNLOCK(); - return 0; - } - - /* removing entries */ - CFS_INIT_LIST_HEAD (&cull); - - LNET_LOCK(); - - list_for_each_safe (el, next, &the_lnet.ln_test_peers) { - tp = list_entry (el, lnet_test_peer_t, tp_list); - - if (tp->tp_threshold == 0 || /* needs culling anyway */ - nid == LNET_NID_ANY || /* removing all entries */ - tp->tp_nid == nid) /* matched this one */ - { - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - - LNET_UNLOCK(); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lnet_test_peer_t, tp_list); - - list_del (&tp->tp_list); - LIBCFS_FREE(tp, sizeof (*tp)); - } - return 0; -} - -static int -fail_peer (lnet_nid_t nid, int outgoing) -{ - lnet_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - struct list_head cull; - int fail = 0; - - CFS_INIT_LIST_HEAD (&cull); - - LNET_LOCK(); - - list_for_each_safe (el, next, &the_lnet.ln_test_peers) { - tp = list_entry (el, lnet_test_peer_t, tp_list); - - if (tp->tp_threshold == 0) { - /* zombie entry */ - if (outgoing) { - /* only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == LNET_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != LNET_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - tp->tp_threshold == 0) { - /* see above */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - break; - } - } - - LNET_UNLOCK (); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lnet_test_peer_t, tp_list); - list_del (&tp->tp_list); - - LIBCFS_FREE(tp, sizeof (*tp)); - } - - return (fail); -} - -unsigned int -lnet_iov_nob (unsigned int niov, struct iovec *iov) -{ - unsigned int nob = 0; - - while (niov-- > 0) - nob += (iov++)->iov_len; - - return (nob); -} - -void -lnet_copy_iov2iov (unsigned int ndiov, struct iovec *diov, unsigned int doffset, - unsigned int nsiov, struct iovec *siov, unsigned int soffset, - unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; - - if (nob == 0) - return; - - /* skip complete frags before 'doffset' */ - LASSERT (ndiov > 0); - while (doffset >= diov->iov_len) { - doffset -= diov->iov_len; - diov++; - ndiov--; - LASSERT (ndiov > 0); - } - - /* skip complete frags before 'soffset' */ - LASSERT (nsiov > 0); - while (soffset >= siov->iov_len) { - soffset -= siov->iov_len; - siov++; - nsiov--; - LASSERT (nsiov > 0); - } - - do { - LASSERT (ndiov > 0); - LASSERT (nsiov > 0); - this_nob = MIN(diov->iov_len - doffset, - siov->iov_len - soffset); - this_nob = MIN(this_nob, nob); - - memcpy ((char *)diov->iov_base + doffset, - (char *)siov->iov_base + soffset, this_nob); - nob -= this_nob; - - if (diov->iov_len > doffset + this_nob) { - doffset += this_nob; - } else { - diov++; - ndiov--; - doffset = 0; - } - - if (siov->iov_len > soffset + this_nob) { - soffset += this_nob; - } else { - siov++; - nsiov--; - soffset = 0; - } - } while (nob > 0); -} - -int -lnet_extract_iov (int dst_niov, struct iovec *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} - -#ifndef __KERNEL__ -unsigned int -lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov) -{ - LASSERT (0); - return (0); -} - -void -lnet_copy_kiov2kiov (unsigned int ndkiov, lnet_kiov_t *dkiov, unsigned int doffset, - unsigned int nskiov, lnet_kiov_t *skiov, unsigned int soffset, - unsigned int nob) -{ - LASSERT (0); -} - -void -lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int nob) -{ - LASSERT (0); -} - -void -lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nob) -{ - LASSERT (0); -} - -int -lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - LASSERT (0); -} - -#else /* __KERNEL__ */ - -unsigned int -lnet_kiov_nob (unsigned int niov, lnet_kiov_t *kiov) -{ - unsigned int nob = 0; - - while (niov-- > 0) - nob += (kiov++)->kiov_len; - - return (nob); -} - -void -lnet_copy_kiov2kiov (unsigned int ndiov, lnet_kiov_t *diov, unsigned int doffset, - unsigned int nsiov, lnet_kiov_t *siov, unsigned int soffset, - unsigned int nob) -{ - /* NB diov, siov are READ-ONLY */ - unsigned int this_nob; - char *daddr = NULL; - char *saddr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (ndiov > 0); - while (doffset > diov->kiov_len) { - doffset -= diov->kiov_len; - diov++; - ndiov--; - LASSERT (ndiov > 0); - } - - LASSERT (nsiov > 0); - while (soffset > siov->kiov_len) { - soffset -= siov->kiov_len; - siov++; - nsiov--; - LASSERT (nsiov > 0); - } - - do { - LASSERT (ndiov > 0); - LASSERT (nsiov > 0); - this_nob = MIN(diov->kiov_len - doffset, - siov->kiov_len - soffset); - this_nob = MIN(this_nob, nob); - - if (daddr == NULL) - daddr = ((char *)cfs_kmap(diov->kiov_page)) + - diov->kiov_offset + doffset; - if (saddr == NULL) - saddr = ((char *)cfs_kmap(siov->kiov_page)) + - siov->kiov_offset + soffset; - - /* Vanishing risk of kmap deadlock when mapping 2 pages. - * However in practice at least one of the kiovs will be mapped - * kernel pages and the map/unmap will be NOOPs */ - - memcpy (daddr, saddr, this_nob); - nob -= this_nob; - - if (diov->kiov_len > doffset + this_nob) { - daddr += this_nob; - doffset += this_nob; - } else { - cfs_kunmap(diov->kiov_page); - daddr = NULL; - diov++; - ndiov--; - doffset = 0; - } - - if (siov->kiov_len > soffset + this_nob) { - saddr += this_nob; - soffset += this_nob; - } else { - cfs_kunmap(siov->kiov_page); - saddr = NULL; - siov++; - nsiov--; - soffset = 0; - } - } while (nob > 0); - - if (daddr != NULL) - cfs_kunmap(diov->kiov_page); - if (saddr != NULL) - cfs_kunmap(siov->kiov_page); -} - -void -lnet_copy_kiov2iov (unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int nob) -{ - /* NB iov, kiov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (niov > 0); - while (iovoffset > iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - LASSERT (nkiov > 0); - while (kiovoffset > kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT (nkiov > 0); - } - - do { - LASSERT (niov > 0); - LASSERT (nkiov > 0); - this_nob = MIN(iov->iov_len - iovoffset, - kiov->kiov_len - kiovoffset); - this_nob = MIN(this_nob, nob); - - if (addr == NULL) - addr = ((char *)cfs_kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob); - nob -= this_nob; - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - cfs_kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - } while (nob > 0); - - if (addr != NULL) - cfs_kunmap(kiov->kiov_page); -} - -void -lnet_copy_iov2kiov (unsigned int nkiov, lnet_kiov_t *kiov, unsigned int kiovoffset, - unsigned int niov, struct iovec *iov, unsigned int iovoffset, - unsigned int nob) -{ - /* NB kiov, iov are READ-ONLY */ - unsigned int this_nob; - char *addr = NULL; - - if (nob == 0) - return; - - LASSERT (!in_interrupt ()); - - LASSERT (nkiov > 0); - while (kiovoffset > kiov->kiov_len) { - kiovoffset -= kiov->kiov_len; - kiov++; - nkiov--; - LASSERT (nkiov > 0); - } - - LASSERT (niov > 0); - while (iovoffset > iov->iov_len) { - iovoffset -= iov->iov_len; - iov++; - niov--; - LASSERT (niov > 0); - } - - do { - LASSERT (nkiov > 0); - LASSERT (niov > 0); - this_nob = MIN(kiov->kiov_len - kiovoffset, - iov->iov_len - iovoffset); - this_nob = MIN(this_nob, nob); - - if (addr == NULL) - addr = ((char *)cfs_kmap(kiov->kiov_page)) + - kiov->kiov_offset + kiovoffset; - - memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob); - nob -= this_nob; - - if (kiov->kiov_len > kiovoffset + this_nob) { - addr += this_nob; - kiovoffset += this_nob; - } else { - cfs_kunmap(kiov->kiov_page); - addr = NULL; - kiov++; - nkiov--; - kiovoffset = 0; - } - - if (iov->iov_len > iovoffset + this_nob) { - iovoffset += this_nob; - } else { - iov++; - niov--; - iovoffset = 0; - } - } while (nob > 0); - - if (addr != NULL) - cfs_kunmap(kiov->kiov_page); -} - -int -lnet_extract_kiov (int dst_niov, lnet_kiov_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->kiov_len - offset; - dst->kiov_page = src->kiov_page; - dst->kiov_offset = src->kiov_offset + offset; - - if (len <= frag_len) { - dst->kiov_len = len; - LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE); - return (niov); - } - - dst->kiov_len = frag_len; - LASSERT (dst->kiov_offset + dst->kiov_len <= CFS_PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -lnet_ni_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - unsigned int niov = 0; - struct iovec *iov = NULL; - lnet_kiov_t *kiov = NULL; - int rc; - - LASSERT (!in_interrupt ()); - LASSERT (mlen == 0 || msg != NULL); - - if (msg != NULL) { - LASSERT(msg->msg_receiving); - LASSERT(!msg->msg_sending); - LASSERT(rlen == msg->msg_len); - LASSERT(mlen <= msg->msg_len); - - msg->msg_wanted = mlen; - msg->msg_offset = offset; - msg->msg_receiving = 0; - - if (mlen != 0) { - niov = msg->msg_niov; - iov = msg->msg_iov; - kiov = msg->msg_kiov; - - LASSERT (niov > 0); - LASSERT ((iov == NULL) != (kiov == NULL)); - } - } - - rc = (ni->ni_lnd->lnd_recv)(ni, private, msg, delayed, - niov, iov, kiov, offset, mlen, rlen); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -int -lnet_compare_routers(lnet_peer_t *p1, lnet_peer_t *p2) -{ - if (p1->lp_txqnob < p2->lp_txqnob) - return 1; - - if (p1->lp_txqnob > p2->lp_txqnob) - return -1; - - if (p1->lp_txcredits > p2->lp_txcredits) - return 1; - - if (p1->lp_txcredits < p2->lp_txcredits) - return -1; - - return 0; -} - - -void -lnet_setpayloadbuffer(lnet_msg_t *msg) -{ - lnet_libmd_t *md = msg->msg_md; - - LASSERT (msg->msg_len > 0); - LASSERT (!msg->msg_routing); - LASSERT (md != NULL); - LASSERT (msg->msg_niov == 0); - LASSERT (msg->msg_iov == NULL); - LASSERT (msg->msg_kiov == NULL); - - msg->msg_niov = md->md_niov; - if ((md->md_options & LNET_MD_KIOV) != 0) - msg->msg_kiov = md->md_iov.kiov; - else - msg->msg_iov = md->md_iov.iov; -} - -void -lnet_prep_send(lnet_msg_t *msg, int type, lnet_process_id_t target, - unsigned int offset, unsigned int len) -{ - msg->msg_type = type; - msg->msg_target = target; - msg->msg_len = len; - msg->msg_offset = offset; - - if (len != 0) - lnet_setpayloadbuffer(msg); - - memset (&msg->msg_hdr, 0, sizeof (msg->msg_hdr)); - msg->msg_hdr.type = cpu_to_le32(type); - msg->msg_hdr.dest_nid = cpu_to_le64(target.nid); - msg->msg_hdr.dest_pid = cpu_to_le32(target.pid); - /* src_nid will be set later */ - msg->msg_hdr.src_pid = cpu_to_le32(the_lnet.ln_pid); - msg->msg_hdr.payload_length = cpu_to_le32(len); -} - -void -lnet_ni_send(lnet_ni_t *ni, lnet_msg_t *msg) -{ - void *priv = msg->msg_private; - int rc; - - LASSERT (!in_interrupt ()); - LASSERT (LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) == LOLND || - (msg->msg_txcredit && msg->msg_peertxcredit)); - - rc = (ni->ni_lnd->lnd_send)(ni, priv, msg); - if (rc < 0) - lnet_finalize(ni, msg, rc); -} - -int -lnet_eager_recv_locked(lnet_msg_t *msg) -{ - lnet_peer_t *peer; - lnet_ni_t *ni; - int rc = 0; - - LASSERT (!msg->msg_delayed); - msg->msg_delayed = 1; - - LASSERT (msg->msg_receiving); - LASSERT (!msg->msg_sending); - - peer = msg->msg_rxpeer; - ni = peer->lp_ni; - - if (ni->ni_lnd->lnd_eager_recv != NULL) { - LNET_UNLOCK(); - - rc = (ni->ni_lnd->lnd_eager_recv)(ni, msg->msg_private, msg, - &msg->msg_private); - if (rc != 0) { - CERROR("recv from %s / send to %s aborted: " - "eager_recv failed %d\n", - libcfs_nid2str(peer->lp_nid), - libcfs_id2str(msg->msg_target), rc); - LASSERT (rc < 0); /* required by my callers */ - } - - LNET_LOCK(); - } - - return rc; -} - -int -lnet_post_send_locked (lnet_msg_t *msg, int do_send) -{ - /* lnet_send is going to LNET_UNLOCK immediately after this, so it sets - * do_send FALSE and I don't do the unlock/send/lock bit. I return - * EAGAIN if msg blocked and 0 if sent or OK to send */ - lnet_peer_t *lp = msg->msg_txpeer; - lnet_ni_t *ni = lp->lp_ni; - - /* non-lnet_send() callers have checked before */ - LASSERT (!do_send || msg->msg_delayed); - LASSERT (!msg->msg_receiving); - - if (!msg->msg_peertxcredit) { - LASSERT ((lp->lp_txcredits < 0) == !list_empty(&lp->lp_txq)); - - msg->msg_peertxcredit = 1; - lp->lp_txqnob += msg->msg_len + sizeof(lnet_hdr_t); - lp->lp_txcredits--; - - if (lp->lp_txcredits < lp->lp_mintxcredits) - lp->lp_mintxcredits = lp->lp_txcredits; - - if (lp->lp_txcredits < 0) { - msg->msg_delayed = 1; - list_add_tail (&msg->msg_list, &lp->lp_txq); - return EAGAIN; - } - } - - if (!msg->msg_txcredit) { - LASSERT ((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq)); - - msg->msg_txcredit = 1; - ni->ni_txcredits--; - - if (ni->ni_txcredits < ni->ni_mintxcredits) - ni->ni_mintxcredits = ni->ni_txcredits; - - if (ni->ni_txcredits < 0) { - msg->msg_delayed = 1; - list_add_tail (&msg->msg_list, &ni->ni_txq); - return EAGAIN; - } - } - - if (do_send) { - LNET_UNLOCK(); - lnet_ni_send(ni, msg); - LNET_LOCK(); - } - return 0; -} - -#ifdef __KERNEL__ -static void -lnet_commit_routedmsg (lnet_msg_t *msg) -{ - /* ALWAYS called holding the LNET_LOCK */ - LASSERT (msg->msg_routing); - - the_lnet.ln_counters.msgs_alloc++; - if (the_lnet.ln_counters.msgs_alloc > - the_lnet.ln_counters.msgs_max) - the_lnet.ln_counters.msgs_max = - the_lnet.ln_counters.msgs_alloc; - - the_lnet.ln_counters.route_count++; - the_lnet.ln_counters.route_length += msg->msg_len; - - LASSERT (!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs); -} - -lnet_rtrbufpool_t * -lnet_msg2bufpool(lnet_msg_t *msg) -{ - lnet_rtrbufpool_t *rbp = &the_lnet.ln_rtrpools[0]; - - LASSERT (msg->msg_len <= LNET_MTU); - while (msg->msg_len > rbp->rbp_npages * CFS_PAGE_SIZE) { - rbp++; - LASSERT (rbp < &the_lnet.ln_rtrpools[LNET_NRBPOOLS]); - } - - return rbp; -} - -int -lnet_post_routed_recv_locked (lnet_msg_t *msg, int do_recv) -{ - /* lnet_parse is going to LNET_UNLOCK immediately after this, so it - * sets do_recv FALSE and I don't do the unlock/send/lock bit. I - * return EAGAIN if msg blocked and 0 if sent or OK to send */ - lnet_peer_t *lp = msg->msg_rxpeer; - lnet_rtrbufpool_t *rbp; - lnet_rtrbuf_t *rb; - - LASSERT (msg->msg_iov == NULL); - LASSERT (msg->msg_kiov == NULL); - LASSERT (msg->msg_niov == 0); - LASSERT (msg->msg_routing); - LASSERT (msg->msg_receiving); - LASSERT (!msg->msg_sending); - - /* non-lnet_parse callers only send delayed messages */ - LASSERT (!do_recv || msg->msg_delayed); - - if (!msg->msg_peerrtrcredit) { - LASSERT ((lp->lp_rtrcredits < 0) == !list_empty(&lp->lp_rtrq)); - - msg->msg_peerrtrcredit = 1; - lp->lp_rtrcredits--; - if (lp->lp_rtrcredits < lp->lp_minrtrcredits) - lp->lp_minrtrcredits = lp->lp_rtrcredits; - - if (lp->lp_rtrcredits < 0) { - /* must have checked eager_recv before here */ - LASSERT (msg->msg_delayed); - list_add_tail(&msg->msg_list, &lp->lp_rtrq); - return EAGAIN; - } - } - - rbp = lnet_msg2bufpool(msg); - - if (!msg->msg_rtrcredit) { - LASSERT ((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs)); - - msg->msg_rtrcredit = 1; - rbp->rbp_credits--; - if (rbp->rbp_credits < rbp->rbp_mincredits) - rbp->rbp_mincredits = rbp->rbp_credits; - - if (rbp->rbp_credits < 0) { - /* must have checked eager_recv before here */ - LASSERT (msg->msg_delayed); - list_add_tail(&msg->msg_list, &rbp->rbp_msgs); - return EAGAIN; - } - } - - LASSERT (!list_empty(&rbp->rbp_bufs)); - rb = list_entry(rbp->rbp_bufs.next, lnet_rtrbuf_t, rb_list); - list_del(&rb->rb_list); - - msg->msg_niov = rbp->rbp_npages; - msg->msg_kiov = &rb->rb_kiov[0]; - - if (do_recv) { - LNET_UNLOCK(); - lnet_ni_recv(lp->lp_ni, msg->msg_private, msg, 1, - 0, msg->msg_len, msg->msg_len); - LNET_LOCK(); - } - return 0; -} -#endif - -void -lnet_return_credits_locked (lnet_msg_t *msg) -{ - lnet_peer_t *txpeer = msg->msg_txpeer; - lnet_peer_t *rxpeer = msg->msg_rxpeer; - lnet_msg_t *msg2; - lnet_ni_t *ni; - - if (msg->msg_txcredit) { - /* give back NI txcredits */ - msg->msg_txcredit = 0; - ni = txpeer->lp_ni; - - LASSERT((ni->ni_txcredits < 0) == !list_empty(&ni->ni_txq)); - - ni->ni_txcredits++; - if (ni->ni_txcredits <= 0) { - msg2 = list_entry(ni->ni_txq.next, lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - LASSERT(msg2->msg_txpeer->lp_ni == ni); - LASSERT(msg2->msg_delayed); - - (void) lnet_post_send_locked(msg2, 1); - } - } - - if (msg->msg_peertxcredit) { - /* give back peer txcredits */ - msg->msg_peertxcredit = 0; - - LASSERT((txpeer->lp_txcredits < 0) == !list_empty(&txpeer->lp_txq)); - - txpeer->lp_txqnob -= msg->msg_len + sizeof(lnet_hdr_t); - LASSERT (txpeer->lp_txqnob >= 0); - - txpeer->lp_txcredits++; - if (txpeer->lp_txcredits <= 0) { - msg2 = list_entry(txpeer->lp_txq.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - LASSERT (msg2->msg_txpeer == txpeer); - LASSERT (msg2->msg_delayed); - - (void) lnet_post_send_locked(msg2, 1); - } - } - - if (txpeer != NULL) { - msg->msg_txpeer = NULL; - lnet_peer_decref_locked(txpeer); - } - -#ifdef __KERNEL__ - if (msg->msg_rtrcredit) { - /* give back global router credits */ - lnet_rtrbuf_t *rb; - lnet_rtrbufpool_t *rbp; - - /* NB If a msg ever blocks for a buffer in rbp_msgs, it stays - * there until it gets one allocated, or aborts the wait - * itself */ - LASSERT (msg->msg_kiov != NULL); - - rb = list_entry(msg->msg_kiov, lnet_rtrbuf_t, rb_kiov[0]); - rbp = rb->rb_pool; - LASSERT (rbp == lnet_msg2bufpool(msg)); - - msg->msg_kiov = NULL; - msg->msg_rtrcredit = 0; - - LASSERT((rbp->rbp_credits < 0) == !list_empty(&rbp->rbp_msgs)); - LASSERT((rbp->rbp_credits > 0) == !list_empty(&rbp->rbp_bufs)); - - list_add(&rb->rb_list, &rbp->rbp_bufs); - rbp->rbp_credits++; - if (rbp->rbp_credits <= 0) { - msg2 = list_entry(rbp->rbp_msgs.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - (void) lnet_post_routed_recv_locked(msg2, 1); - } - } - - if (msg->msg_peerrtrcredit) { - /* give pack peer router credits */ - msg->msg_peerrtrcredit = 0; - - LASSERT((rxpeer->lp_rtrcredits < 0) == !list_empty(&rxpeer->lp_rtrq)); - - rxpeer->lp_rtrcredits++; - if (rxpeer->lp_rtrcredits <= 0) { - msg2 = list_entry(rxpeer->lp_rtrq.next, - lnet_msg_t, msg_list); - list_del(&msg2->msg_list); - - (void) lnet_post_routed_recv_locked(msg2, 1); - } - } -#else - LASSERT (!msg->msg_rtrcredit); - LASSERT (!msg->msg_peerrtrcredit); -#endif - if (rxpeer != NULL) { - msg->msg_rxpeer = NULL; - lnet_peer_decref_locked(rxpeer); - } -} - -int -lnet_send(lnet_nid_t src_nid, lnet_msg_t *msg) -{ - lnet_nid_t dst_nid = msg->msg_target.nid; - lnet_ni_t *src_ni; - lnet_ni_t *local_ni; - lnet_remotenet_t *rnet; - lnet_route_t *route; - lnet_route_t *best_route; - struct list_head *tmp; - lnet_peer_t *lp; - lnet_peer_t *lp2; - int rc; - - LASSERT (msg->msg_txpeer == NULL); - LASSERT (!msg->msg_sending); - LASSERT (!msg->msg_target_is_router); - LASSERT (!msg->msg_receiving); - - msg->msg_sending = 1; - - /* NB! ni != NULL == interface pre-determined (ACK/REPLY) */ - - LNET_LOCK(); - - if (the_lnet.ln_shutdown) { - LNET_UNLOCK(); - return -ESHUTDOWN; - } - - if (src_nid == LNET_NID_ANY) { - src_ni = NULL; - } else { - src_ni = lnet_nid2ni_locked(src_nid); - if (src_ni == NULL) { - LNET_UNLOCK(); - CERROR("Can't send to %s: src %s is not a local nid\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EINVAL; - } - LASSERT (!msg->msg_routing); - } - - /* Is this for someone on a local network? */ - local_ni = lnet_net2ni_locked(LNET_NIDNET(dst_nid)); - - if (local_ni != NULL) { - if (src_ni == NULL) { - src_ni = local_ni; - src_nid = src_ni->ni_nid; - } else if (src_ni == local_ni) { - lnet_ni_decref_locked(local_ni); - } else { - lnet_ni_decref_locked(local_ni); - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("no route to %s via from %s\n", - libcfs_nid2str(dst_nid), libcfs_nid2str(src_nid)); - return -EINVAL; - } - - LASSERT (src_nid != LNET_NID_ANY); - - if (!msg->msg_routing) { - src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid); - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - if (src_ni == the_lnet.ln_loni) { - /* No send credit hassles with LOLND */ - LNET_UNLOCK(); - lnet_ni_send(src_ni, msg); - lnet_ni_decref(src_ni); - return 0; - } - - rc = lnet_nid2peer_locked(&lp, dst_nid); - lnet_ni_decref_locked(src_ni); /* lp has ref on src_ni; lose mine */ - if (rc != 0) { - LNET_UNLOCK(); - CERROR("Error %d finding peer %s\n", rc, - libcfs_nid2str(dst_nid)); - /* ENOMEM or shutting down */ - return rc; - } - LASSERT (lp->lp_ni == src_ni); - } else { - /* sending to a remote network */ - rnet = lnet_find_net_locked(LNET_NIDNET(dst_nid)); - if (rnet == NULL) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("No route to %s\n", libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - /* Find the best gateway I can use */ - lp = NULL; - best_route = NULL; - list_for_each(tmp, &rnet->lrn_routes) { - route = list_entry(tmp, lnet_route_t, lr_list); - lp2 = route->lr_gateway; - - if (lp2->lp_alive && - (src_ni == NULL || lp2->lp_ni == src_ni) && - (lp == NULL || lnet_compare_routers(lp2, lp) > 0)) { - best_route = route; - lp = lp2; - } - } - - if (lp == NULL) { - if (src_ni != NULL) - lnet_ni_decref_locked(src_ni); - LNET_UNLOCK(); - CERROR("No route to %s (all routers down)\n", - libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - /* Place selected route at the end of the route list to ensure - * fairness; everything else being equal... */ - list_del(&best_route->lr_list); - list_add_tail(&best_route->lr_list, &rnet->lrn_routes); - - if (src_ni == NULL) { - src_ni = lp->lp_ni; - src_nid = src_ni->ni_nid; - } else { - LASSERT (src_ni == lp->lp_ni); - lnet_ni_decref_locked(src_ni); - } - - lnet_peer_addref_locked(lp); - - LASSERT (src_nid != LNET_NID_ANY); - - if (!msg->msg_routing) { - /* I'm the source and now I know which NI to send on */ - src_nid = lnet_ptlcompat_srcnid(src_nid, dst_nid); - msg->msg_hdr.src_nid = cpu_to_le64(src_nid); - } - - msg->msg_target_is_router = 1; - msg->msg_target.nid = lp->lp_nid; - msg->msg_target.pid = LUSTRE_SRV_LNET_PID; - } - - /* 'lp' is our best choice of peer */ - - LASSERT (!msg->msg_peertxcredit); - LASSERT (!msg->msg_txcredit); - LASSERT (msg->msg_txpeer == NULL); - - msg->msg_txpeer = lp; /* msg takes my ref on lp */ - - rc = lnet_post_send_locked(msg, 0); - LNET_UNLOCK(); - - if (rc == 0) - lnet_ni_send(src_ni, msg); - - return 0; -} - -static void -lnet_commit_md (lnet_libmd_t *md, lnet_msg_t *msg) -{ - /* ALWAYS called holding the LNET_LOCK */ - /* Here, we commit the MD to a network OP by marking it busy and - * decrementing its threshold. Come what may, the network "owns" - * the MD until a call to lnet_finalize() signals completion. */ - LASSERT (!msg->msg_routing); - - msg->msg_md = md; - - md->md_refcount++; - if (md->md_threshold != LNET_MD_THRESH_INF) { - LASSERT (md->md_threshold > 0); - md->md_threshold--; - } - - the_lnet.ln_counters.msgs_alloc++; - if (the_lnet.ln_counters.msgs_alloc > - the_lnet.ln_counters.msgs_max) - the_lnet.ln_counters.msgs_max = - the_lnet.ln_counters.msgs_alloc; - - LASSERT (!msg->msg_onactivelist); - msg->msg_onactivelist = 1; - list_add (&msg->msg_activelist, &the_lnet.ln_active_msgs); -} - -static void -lnet_drop_message (lnet_ni_t *ni, void *private, unsigned int nob) -{ - LNET_LOCK(); - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += nob; - LNET_UNLOCK(); - - lnet_ni_recv(ni, private, NULL, 0, 0, 0, nob); -} - -static void -lnet_drop_delayed_put(lnet_msg_t *msg, char *reason) -{ - LASSERT (msg->msg_md == NULL); - LASSERT (msg->msg_delayed); - LASSERT (msg->msg_rxpeer != NULL); - LASSERT (msg->msg_hdr.type == LNET_MSG_PUT); - - CWARN("Dropping delayed PUT from %s portal %d match "LPU64 - " offset %d length %d: %s\n", - libcfs_id2str((lnet_process_id_t){ - .nid = msg->msg_hdr.src_nid, - .pid = msg->msg_hdr.src_pid}), - msg->msg_hdr.msg.put.ptl_index, - msg->msg_hdr.msg.put.match_bits, - msg->msg_hdr.msg.put.offset, - msg->msg_hdr.payload_length, - reason); - - /* NB I can't drop msg's ref on msg_rxpeer until after I've - * called lnet_drop_message(), so I just hang onto msg as well - * until that's done */ - - lnet_drop_message(msg->msg_rxpeer->lp_ni, - msg->msg_private, msg->msg_len); - - LNET_LOCK(); - - lnet_peer_decref_locked(msg->msg_rxpeer); - msg->msg_rxpeer = NULL; - - lnet_msg_free(msg); - - LNET_UNLOCK(); -} - -int -LNetSetLazyPortal(int portal) -{ - lnet_portal_t *ptl = &the_lnet.ln_portals[portal]; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - CDEBUG(D_NET, "Setting portal %d lazy\n", portal); - - LNET_LOCK(); - - ptl->ptl_options |= LNET_PTL_LAZY; - - LNET_UNLOCK(); - - return 0; -} - -int -LNetClearLazyPortal(int portal) -{ - struct list_head zombies; - lnet_portal_t *ptl = &the_lnet.ln_portals[portal]; - lnet_msg_t *msg; - - if (portal < 0 || portal >= the_lnet.ln_nportals) - return -EINVAL; - - LNET_LOCK(); - - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { - LNET_UNLOCK(); - return 0; - } - - CDEBUG(D_NET, "clearing portal %d lazy\n", portal); - - /* grab all the blocked messages atomically */ - list_add(&zombies, &ptl->ptl_msgq); - list_del_init(&ptl->ptl_msgq); - - ptl->ptl_msgq_version++; - ptl->ptl_options &= ~LNET_PTL_LAZY; - - LNET_UNLOCK(); - - while (!list_empty(&zombies)) { - msg = list_entry(zombies.next, lnet_msg_t, msg_list); - list_del(&msg->msg_list); - - lnet_drop_delayed_put(msg, "Clearing lazy portal attr"); - } - - return 0; -} - -static void -lnet_recv_put(lnet_libmd_t *md, lnet_msg_t *msg, int delayed, - unsigned int offset, unsigned int mlength) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - - LNET_LOCK(); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += mlength; - - LNET_UNLOCK(); - - if (mlength != 0) - lnet_setpayloadbuffer(msg); - - msg->msg_ev.type = LNET_EVENT_PUT; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.hdr_data = hdr->msg.put.hdr_data; - - /* Must I ACK? If so I'll grab the ack_wmd out of the header and put - * it back into the ACK during lnet_finalize() */ - msg->msg_ack = (!lnet_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - (md->md_options & LNET_MD_ACK_DISABLE) == 0); - - lnet_ni_recv(msg->msg_rxpeer->lp_ni, - msg->msg_private, - msg, delayed, offset, mlength, - hdr->payload_length); -} - -/* called with LNET_LOCK held */ -void -lnet_match_blocked_msg(lnet_libmd_t *md) -{ - CFS_LIST_HEAD (drops); - CFS_LIST_HEAD (matches); - struct list_head *tmp; - struct list_head *entry; - lnet_msg_t *msg; - lnet_me_t *me = md->md_me; - lnet_portal_t *ptl = &the_lnet.ln_portals[me->me_portal]; - - LASSERT (me->me_portal < the_lnet.ln_nportals); - - if ((ptl->ptl_options & LNET_PTL_LAZY) == 0) { - LASSERT (list_empty(&ptl->ptl_msgq)); - return; - } - - LASSERT (md->md_refcount == 0); /* a brand new MD */ - - list_for_each_safe (entry, tmp, &ptl->ptl_msgq) { - int rc; - int index; - unsigned int mlength; - unsigned int offset; - lnet_hdr_t *hdr; - lnet_process_id_t src; - - msg = list_entry(entry, lnet_msg_t, msg_list); - - LASSERT (msg->msg_delayed); - - hdr = &msg->msg_hdr; - index = hdr->msg.put.ptl_index; - - src.nid = hdr->src_nid; - src.pid = hdr->src_pid; - - rc = lnet_try_match_md(index, LNET_MD_OP_PUT, src, - hdr->payload_length, - hdr->msg.put.offset, - hdr->msg.put.match_bits, - md, msg, &mlength, &offset); - - if (rc == LNET_MATCHMD_NONE) - continue; - - /* Hurrah! This _is_ a match */ - list_del(&msg->msg_list); - ptl->ptl_msgq_version++; - - if (rc == LNET_MATCHMD_OK) { - list_add_tail(&msg->msg_list, &matches); - - CDEBUG(D_NET, "Resuming delayed PUT from %s portal %d " - "match "LPU64" offset %d length %d.\n", - libcfs_id2str(src), - hdr->msg.put.ptl_index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, - hdr->payload_length); - } else { - LASSERT (rc == LNET_MATCHMD_DROP); - - list_add_tail(&msg->msg_list, &drops); - } - - if (lnet_md_exhausted(md)) - break; - } - - LNET_UNLOCK(); - - list_for_each_safe (entry, tmp, &drops) { - msg = list_entry(entry, lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - lnet_drop_delayed_put(msg, "Bad match"); - } - - list_for_each_safe (entry, tmp, &matches) { - msg = list_entry(entry, lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - /* md won't disappear under me, since each msg - * holds a ref on it */ - lnet_recv_put(md, msg, 1, - msg->msg_ev.offset, - msg->msg_ev.mlength); - } - - LNET_LOCK(); -} - -static int -lnet_parse_put(lnet_ni_t *ni, lnet_msg_t *msg) -{ - int rc; - int index; - lnet_hdr_t *hdr = &msg->msg_hdr; - unsigned int rlength = hdr->payload_length; - unsigned int mlength = 0; - unsigned int offset = 0; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - - /* Convert put fields to host byte order */ - hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index); - hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset); - - index = hdr->msg.put.ptl_index; - - LNET_LOCK(); - - rc = lnet_match_md(index, LNET_MD_OP_PUT, src, - rlength, hdr->msg.put.offset, - hdr->msg.put.match_bits, msg, - &mlength, &offset, &md); - switch (rc) { - default: - LBUG(); - - case LNET_MATCHMD_OK: - LNET_UNLOCK(); - lnet_recv_put(md, msg, 0, offset, mlength); - return 0; - - case LNET_MATCHMD_NONE: - rc = lnet_eager_recv_locked(msg); - if (rc == 0) { - list_add_tail(&msg->msg_list, - &the_lnet.ln_portals[index].ptl_msgq); - - the_lnet.ln_portals[index].ptl_msgq_version++; - - CDEBUG(D_NET, "Delaying PUT from %s portal %d match " - LPU64" offset %d length %d: no match \n", - libcfs_id2str(src), index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, rlength); - - LNET_UNLOCK(); - return 0; - } - /* fall through */ - - case LNET_MATCHMD_DROP: - CWARN("Dropping PUT from %s portal %d match "LPU64 - " offset %d length %d: %d\n", - libcfs_id2str(src), index, - hdr->msg.put.match_bits, - hdr->msg.put.offset, rlength, rc); - LNET_UNLOCK(); - - return ENOENT; /* +ve: OK but no match */ - - } -} - -static int -lnet_parse_get(lnet_ni_t *ni, lnet_msg_t *msg, int rdma_get) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - unsigned int mlength = 0; - unsigned int offset = 0; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_handle_wire_t reply_wmd; - lnet_libmd_t *md; - int rc; - - /* Convert get fields to host byte order */ - hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length); - hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset); - - LNET_LOCK(); - - rc = lnet_match_md(hdr->msg.get.ptl_index, LNET_MD_OP_GET, src, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, msg, - &mlength, &offset, &md); - if (rc == LNET_MATCHMD_DROP) { - CWARN("Dropping GET from %s portal %d match "LPU64 - " offset %d length %d\n", - libcfs_id2str(src), - hdr->msg.get.ptl_index, - hdr->msg.get.match_bits, - hdr->msg.get.src_offset, - hdr->msg.get.sink_length); - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - LASSERT (rc == LNET_MATCHMD_OK); - - the_lnet.ln_counters.send_count++; - the_lnet.ln_counters.send_length += mlength; - - LNET_UNLOCK(); - - reply_wmd = hdr->msg.get.return_wmd; - - lnet_prep_send(msg, LNET_MSG_REPLY, src, offset, mlength); - - msg->msg_hdr.msg.reply.dst_wmd = reply_wmd; - - msg->msg_ev.type = LNET_EVENT_GET; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.hdr_data = 0; - - if (rdma_get) { - /* The LND completes the REPLY from her recv procedure */ - lnet_ni_recv(ni, msg->msg_private, msg, 0, - msg->msg_offset, msg->msg_len, msg->msg_len); - return 0; - } - - lnet_ni_recv(ni, msg->msg_private, NULL, 0, 0, 0, 0); - msg->msg_receiving = 0; - - rc = lnet_send(ni->ni_nid, msg); - if (rc < 0) { - /* didn't get as far as lnet_ni_send() */ - CERROR("%s: Unable to send REPLY for GET from %s: %d\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), rc); - - lnet_finalize(ni, msg, rc); - } - - return 0; -} - -static int -lnet_parse_reply(lnet_ni_t *ni, lnet_msg_t *msg) -{ - void *private = msg->msg_private; - lnet_hdr_t *hdr = &msg->msg_hdr; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - int rlength; - int mlength; - - LNET_LOCK(); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.reply.dst_wmd); - if (md == NULL || md->md_threshold == 0) { - CWARN("%s: Dropping REPLY from %s for %s " - "MD "LPX64"."LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - LASSERT (md->md_offset == 0); - - rlength = hdr->payload_length; - mlength = MIN(rlength, md->md_length); - - if (mlength < rlength && - (md->md_options & LNET_MD_TRUNCATE) == 0) { - CERROR ("%s: Dropping REPLY from %s length %d " - "for MD "LPX64" would overflow (%d)\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - rlength, hdr->msg.reply.dst_wmd.wh_object_cookie, - mlength); - LNET_UNLOCK(); - return ENOENT; /* +ve: OK but no match */ - } - - CDEBUG(D_NET, "%s: Reply from %s of length %d/%d into md "LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - mlength, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); - - lnet_commit_md(md, msg); - - if (mlength != 0) - lnet_setpayloadbuffer(msg); - - msg->msg_ev.type = LNET_EVENT_REPLY; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.initiator = src; - msg->msg_ev.rlength = rlength; - msg->msg_ev.mlength = mlength; - msg->msg_ev.offset = 0; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += mlength; - - LNET_UNLOCK(); - - lnet_ni_recv(ni, private, msg, 0, 0, mlength, rlength); - return 0; -} - -static int -lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg) -{ - lnet_hdr_t *hdr = &msg->msg_hdr; - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_libmd_t *md; - - /* Convert ack fields to host byte order */ - hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength); - - LNET_LOCK(); - - /* NB handles only looked up by creator (no flips) */ - md = lnet_wire_handle2md(&hdr->msg.ack.dst_wmd); - if (md == NULL || md->md_threshold == 0) { -#if 0 - /* Don't moan; this is expected */ - CERROR ("%s: Dropping ACK from %s to %s MD "LPX64"."LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - (md == NULL) ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); -#endif - LNET_UNLOCK(); - return ENOENT; /* +ve! */ - } - - CDEBUG(D_NET, "%s: ACK from %s into md "LPX64"\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(src), - hdr->msg.ack.dst_wmd.wh_object_cookie); - - lnet_commit_md(md, msg); - - msg->msg_ev.type = LNET_EVENT_ACK; - msg->msg_ev.target.pid = hdr->dest_pid; - msg->msg_ev.target.nid = hdr->dest_nid; - msg->msg_ev.initiator = src; - msg->msg_ev.mlength = hdr->msg.ack.mlength; - msg->msg_ev.match_bits = hdr->msg.ack.match_bits; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.recv_count++; - - LNET_UNLOCK(); - - lnet_ni_recv(ni, msg->msg_private, msg, 0, 0, 0, msg->msg_len); - return 0; -} - -char * -lnet_msgtyp2str (int type) -{ - switch (type) { - case LNET_MSG_ACK: - return ("ACK"); - case LNET_MSG_PUT: - return ("PUT"); - case LNET_MSG_GET: - return ("GET"); - case LNET_MSG_REPLY: - return ("REPLY"); - case LNET_MSG_HELLO: - return ("HELLO"); - default: - return ("<UNKNOWN>"); - } -} - -void -lnet_print_hdr(lnet_hdr_t * hdr) -{ - lnet_process_id_t src = {/* .nid = */ hdr->src_nid, - /* .pid = */ hdr->src_pid}; - lnet_process_id_t dst = {/* .nid = */ hdr->dest_nid, - /* .pid = */ hdr->dest_pid}; - char *type_str = lnet_msgtyp2str (hdr->type); - - CWARN("P3 Header at %p of type %s\n", hdr, type_str); - CWARN(" From %s\n", libcfs_id2str(src)); - CWARN(" To %s\n", libcfs_id2str(dst)); - - switch (hdr->type) { - default: - break; - - case LNET_MSG_PUT: - CWARN(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPU64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - CWARN(" Length %d, offset %d, hdr data "LPX64"\n", - hdr->payload_length, hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case LNET_MSG_GET: - CWARN(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPU64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CWARN(" Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case LNET_MSG_ACK: - CWARN(" dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case LNET_MSG_REPLY: - CWARN(" dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - hdr->payload_length); - } - -} - - -int -lnet_parse(lnet_ni_t *ni, lnet_hdr_t *hdr, lnet_nid_t from_nid, - void *private, int rdma_req) -{ - int rc = 0; - int for_me; - lnet_msg_t *msg; - lnet_nid_t dest_nid; - lnet_nid_t src_nid; - __u32 payload_length; - __u32 type; - - LASSERT (!in_interrupt ()); - - type = le32_to_cpu(hdr->type); - src_nid = le64_to_cpu(hdr->src_nid); - dest_nid = le64_to_cpu(hdr->dest_nid); - payload_length = le32_to_cpu(hdr->payload_length); - - for_me = lnet_ptlcompat_matchnid(ni->ni_nid, dest_nid); - - switch (type) { - case LNET_MSG_ACK: - case LNET_MSG_GET: - if (payload_length > 0) { - CERROR("%s, src %s: bad %s payload %d (0 expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), payload_length); - return -EPROTO; - } - break; - - case LNET_MSG_PUT: - case LNET_MSG_REPLY: - if (payload_length > (for_me ? LNET_MAX_PAYLOAD : LNET_MTU)) { - CERROR("%s, src %s: bad %s payload %d " - "(%d max expected)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), - payload_length, - for_me ? LNET_MAX_PAYLOAD : LNET_MTU); - return -EPROTO; - } - break; - - default: - CERROR("%s, src %s: Bad message type 0x%x\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), type); - return -EPROTO; - } - - /* Regard a bad destination NID as a protocol error. Senders should - * know what they're doing; if they don't they're misconfigured, buggy - * or malicious so we chop them off at the knees :) */ - - if (!for_me) { - if (the_lnet.ln_ptlcompat > 0) { - /* portals compatibility is single-network */ - CERROR ("%s, src %s: Bad dest nid %s " - "(routing not supported)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (the_lnet.ln_ptlcompat == 0 && - LNET_NIDNET(dest_nid) == LNET_NIDNET(ni->ni_nid)) { - /* should have gone direct */ - CERROR ("%s, src %s: Bad dest nid %s " - "(should have been sent direct)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (the_lnet.ln_ptlcompat == 0 && - lnet_islocalnid(dest_nid)) { - /* dest is another local NI; sender should have used - * this node's NID on its own network */ - CERROR ("%s, src %s: Bad dest nid %s " - "(it's my nid but on a different network)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (rdma_req && type == LNET_MSG_GET) { - CERROR ("%s, src %s: Bad optimized GET for %s " - "(final destination must be me)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - return -EPROTO; - } - - if (!the_lnet.ln_routing) { - CERROR ("%s, src %s: Dropping message for %s " - "(routing not enabled)\n", - libcfs_nid2str(from_nid), - libcfs_nid2str(src_nid), - libcfs_nid2str(dest_nid)); - goto drop; - } - } - - /* Message looks OK; we're not going to return an error, so we MUST - * call back lnd_recv() come what may... */ - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (src_nid, 0)) /* shall we now? */ - { - CERROR("%s, src %s: Dropping %s to simulate failure\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type)); - goto drop; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("%s, src %s: Dropping %s (out of memory)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid) - , lnet_msgtyp2str(type)); - goto drop; - } - - /* msg zeroed in lnet_msg_alloc; i.e. flags all clear, pointers NULL etc */ - - msg->msg_type = type; - msg->msg_private = private; - msg->msg_receiving = 1; - msg->msg_len = msg->msg_wanted = payload_length; - msg->msg_offset = 0; - msg->msg_hdr = *hdr; - - LNET_LOCK(); - rc = lnet_nid2peer_locked(&msg->msg_rxpeer, from_nid); - if (rc != 0) { - LNET_UNLOCK(); - CERROR("%s, src %s: Dropping %s " - "(error %d looking up sender)\n", - libcfs_nid2str(from_nid), libcfs_nid2str(src_nid), - lnet_msgtyp2str(type), rc); - goto free_drop; - } - LNET_UNLOCK(); - -#ifndef __KERNEL__ - LASSERT (for_me); -#else - if (!for_me) { - msg->msg_target.pid = le32_to_cpu(hdr->dest_pid); - msg->msg_target.nid = dest_nid; - msg->msg_routing = 1; - msg->msg_offset = 0; - - LNET_LOCK(); - if (msg->msg_rxpeer->lp_rtrcredits <= 0 || - lnet_msg2bufpool(msg)->rbp_credits <= 0) { - rc = lnet_eager_recv_locked(msg); - if (rc != 0) { - LNET_UNLOCK(); - goto free_drop; - } - } - - lnet_commit_routedmsg(msg); - rc = lnet_post_routed_recv_locked(msg, 0); - LNET_UNLOCK(); - - if (rc == 0) - lnet_ni_recv(ni, msg->msg_private, msg, 0, - 0, payload_length, payload_length); - return 0; - } -#endif - /* convert common msg->hdr fields to host byteorder */ - msg->msg_hdr.type = type; - msg->msg_hdr.src_nid = src_nid; - msg->msg_hdr.src_pid = le32_to_cpu(msg->msg_hdr.src_pid); - msg->msg_hdr.dest_nid = dest_nid; - msg->msg_hdr.dest_pid = le32_to_cpu(msg->msg_hdr.dest_pid); - msg->msg_hdr.payload_length = payload_length; - - msg->msg_ev.sender = from_nid; - - switch (type) { - case LNET_MSG_ACK: - rc = lnet_parse_ack(ni, msg); - break; - case LNET_MSG_PUT: - rc = lnet_parse_put(ni, msg); - break; - case LNET_MSG_GET: - rc = lnet_parse_get(ni, msg, rdma_req); - break; - case LNET_MSG_REPLY: - rc = lnet_parse_reply(ni, msg); - break; - default: - LASSERT(0); - goto free_drop; /* prevent an unused label if !kernel */ - } - - if (rc == 0) - return 0; - - LASSERT (rc == ENOENT); - - free_drop: - LASSERT (msg->msg_md == NULL); - LNET_LOCK(); - if (msg->msg_rxpeer != NULL) { - lnet_peer_decref_locked(msg->msg_rxpeer); - msg->msg_rxpeer = NULL; - } - lnet_msg_free(msg); /* expects LNET_LOCK held */ - LNET_UNLOCK(); - - drop: - lnet_drop_message(ni, private, payload_length); - return 0; -} - -int -LNetPut(lnet_nid_t self, lnet_handle_md_t mdh, lnet_ack_req_t ack, - lnet_process_id_t target, unsigned int portal, - __u64 match_bits, unsigned int offset, - __u64 hdr_data) -{ - lnet_msg_t *msg; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (target.nid, 1)) /* shall we now? */ - { - CERROR("Dropping PUT to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("Dropping PUT to %s: ENOMEM on lnet_msg_t\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { - lnet_msg_free(msg); - LNET_UNLOCK(); - - CERROR("Dropping PUT to %s: MD invalid\n", - libcfs_id2str(target)); - return -ENOENT; - } - - CDEBUG(D_NET, "LNetPut -> %s\n", libcfs_id2str(target)); - - lnet_commit_md(md, msg); - - lnet_prep_send(msg, LNET_MSG_PUT, target, 0, md->md_length); - - msg->msg_hdr.msg.put.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.put.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.put.offset = cpu_to_le32(offset); - msg->msg_hdr.msg.put.hdr_data = hdr_data; - - /* NB handles only looked up by creator (no flips) */ - if (ack == LNET_ACK_REQ) { - msg->msg_hdr.msg.put.ack_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.put.ack_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - } else { - msg->msg_hdr.msg.put.ack_wmd = LNET_WIRE_HANDLE_NONE; - } - - msg->msg_ev.type = LNET_EVENT_SEND; - msg->msg_ev.initiator.nid = LNET_NID_ANY; - msg->msg_ev.initiator.pid = the_lnet.ln_pid; - msg->msg_ev.target = target; - msg->msg_ev.sender = LNET_NID_ANY; - msg->msg_ev.pt_index = portal; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = md->md_length; - msg->msg_ev.mlength = md->md_length; - msg->msg_ev.offset = offset; - msg->msg_ev.hdr_data = hdr_data; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.send_count++; - the_lnet.ln_counters.send_length += md->md_length; - - LNET_UNLOCK(); - - rc = lnet_send(self, msg); - if (rc != 0) { - CERROR("Error sending PUT to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize (NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} - -lnet_msg_t * -lnet_create_reply_msg (lnet_ni_t *ni, lnet_msg_t *getmsg) -{ - /* The LND can DMA direct to the GET md (i.e. no REPLY msg). This - * returns a msg for the LND to pass to lnet_finalize() when the sink - * data has been received. - * - * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when - * lnet_finalize() is called on it, so the LND must call this first */ - - lnet_msg_t *msg = lnet_msg_alloc(); - lnet_libmd_t *getmd = getmsg->msg_md; - lnet_process_id_t peer_id = getmsg->msg_target; - - LASSERT (!getmsg->msg_target_is_router); - LASSERT (!getmsg->msg_routing); - - LNET_LOCK(); - - LASSERT (getmd->md_refcount > 0); - - if (msg == NULL) { - CERROR ("%s: Dropping REPLY from %s: can't allocate msg\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id)); - goto drop; - } - - if (getmd->md_threshold == 0) { - CERROR ("%s: Dropping REPLY from %s for inactive MD %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), - getmd); - goto drop_msg; - } - - LASSERT (getmd->md_offset == 0); - - CDEBUG(D_NET, "%s: Reply from %s md %p\n", - libcfs_nid2str(ni->ni_nid), libcfs_id2str(peer_id), getmd); - - lnet_commit_md (getmd, msg); - - msg->msg_type = LNET_MSG_GET; /* flag this msg as an "optimized" GET */ - - msg->msg_ev.type = LNET_EVENT_REPLY; - msg->msg_ev.initiator = peer_id; - msg->msg_ev.sender = peer_id.nid; /* optimized GETs can't be routed */ - msg->msg_ev.rlength = msg->msg_ev.mlength = getmd->md_length; - msg->msg_ev.offset = 0; - - lnet_md_deconstruct(getmd, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, getmd); - - the_lnet.ln_counters.recv_count++; - the_lnet.ln_counters.recv_length += getmd->md_length; - - LNET_UNLOCK(); - - return msg; - - drop_msg: - lnet_msg_free(msg); - drop: - the_lnet.ln_counters.drop_count++; - the_lnet.ln_counters.drop_length += getmd->md_length; - - LNET_UNLOCK (); - - return NULL; -} - -void -lnet_set_reply_msg_len(lnet_ni_t *ni, lnet_msg_t *reply, unsigned int len) -{ - /* Set the REPLY length, now the RDMA that elides the REPLY message has - * completed and I know it. */ - LASSERT (reply != NULL); - LASSERT (reply->msg_type == LNET_MSG_GET); - LASSERT (reply->msg_ev.type == LNET_EVENT_REPLY); - - /* NB I trusted my peer to RDMA. If she tells me she's written beyond - * the end of my buffer, I might as well be dead. */ - LASSERT (len <= reply->msg_ev.mlength); - - reply->msg_ev.mlength = len; -} - -int -LNetGet(lnet_nid_t self, lnet_handle_md_t mdh, - lnet_process_id_t target, unsigned int portal, - __u64 match_bits, unsigned int offset) -{ - lnet_msg_t *msg; - lnet_libmd_t *md; - int rc; - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - if (!list_empty (&the_lnet.ln_test_peers) && /* normally we don't */ - fail_peer (target.nid, 1)) /* shall we now? */ - { - CERROR("Dropping GET to %s: simulated failure\n", - libcfs_id2str(target)); - return -EIO; - } - - msg = lnet_msg_alloc(); - if (msg == NULL) { - CERROR("Dropping GET to %s: ENOMEM on lnet_msg_t\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - LNET_LOCK(); - - md = lnet_handle2md(&mdh); - if (md == NULL || md->md_threshold == 0) { - lnet_msg_free(msg); - LNET_UNLOCK(); - - CERROR("Dropping GET to %s: MD invalid\n", - libcfs_id2str(target)); - return -ENOENT; - } - - CDEBUG(D_NET, "LNetGet -> %s\n", libcfs_id2str(target)); - - lnet_commit_md(md, msg); - - lnet_prep_send(msg, LNET_MSG_GET, target, 0, 0); - - msg->msg_hdr.msg.get.match_bits = cpu_to_le64(match_bits); - msg->msg_hdr.msg.get.ptl_index = cpu_to_le32(portal); - msg->msg_hdr.msg.get.src_offset = cpu_to_le32(offset); - msg->msg_hdr.msg.get.sink_length = cpu_to_le32(md->md_length); - - /* NB handles only looked up by creator (no flips) */ - msg->msg_hdr.msg.get.return_wmd.wh_interface_cookie = - the_lnet.ln_interface_cookie; - msg->msg_hdr.msg.get.return_wmd.wh_object_cookie = - md->md_lh.lh_cookie; - - msg->msg_ev.type = LNET_EVENT_SEND; - msg->msg_ev.initiator.nid = LNET_NID_ANY; - msg->msg_ev.initiator.pid = the_lnet.ln_pid; - msg->msg_ev.target = target; - msg->msg_ev.sender = LNET_NID_ANY; - msg->msg_ev.pt_index = portal; - msg->msg_ev.match_bits = match_bits; - msg->msg_ev.rlength = md->md_length; - msg->msg_ev.mlength = md->md_length; - msg->msg_ev.offset = offset; - msg->msg_ev.hdr_data = 0; - - lnet_md_deconstruct(md, &msg->msg_ev.md); - lnet_md2handle(&msg->msg_ev.md_handle, md); - - the_lnet.ln_counters.send_count++; - - LNET_UNLOCK(); - - rc = lnet_send(self, msg); - if (rc < 0) { - CERROR("error sending GET to %s: %d\n", - libcfs_id2str(target), rc); - lnet_finalize (NULL, msg, rc); - } - - /* completion will be signalled by an event */ - return 0; -} - -int -LNetDist (lnet_nid_t dstnid, lnet_nid_t *srcnidp, int *orderp) -{ - struct list_head *e; - lnet_ni_t *ni; - lnet_route_t *route; - lnet_remotenet_t *rnet; - __u32 dstnet = LNET_NIDNET(dstnid); - int hops; - int order = 2; - - /* if !local_nid_dist_zero, I don't return a distance of 0 ever - * (when lustre sees a distance of 0, it substitutes 0@lo), so I - * keep order 0 free for 0@lo and order 1 free for a local NID - * match */ - - LASSERT (the_lnet.ln_init); - LASSERT (the_lnet.ln_refcount > 0); - - LNET_LOCK(); - - list_for_each (e, &the_lnet.ln_nis) { - ni = list_entry(e, lnet_ni_t, ni_list); - - if (ni->ni_nid == dstnid || - (the_lnet.ln_ptlcompat > 0 && - LNET_NIDNET(dstnid) == 0 && - LNET_NIDADDR(dstnid) == LNET_NIDADDR(ni->ni_nid) && - LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) { - if (srcnidp != NULL) - *srcnidp = dstnid; - if (orderp != NULL) { - if (LNET_NETTYP(LNET_NIDNET(dstnid)) == LOLND) - *orderp = 0; - else - *orderp = 1; - } - LNET_UNLOCK(); - - return local_nid_dist_zero ? 0 : 1; - } - - if (LNET_NIDNET(ni->ni_nid) == dstnet || - (the_lnet.ln_ptlcompat > 0 && - dstnet == 0 && - LNET_NETTYP(LNET_NIDNET(ni->ni_nid)) != LOLND)) { - if (srcnidp != NULL) - *srcnidp = ni->ni_nid; - if (orderp != NULL) - *orderp = order; - LNET_UNLOCK(); - return 1; - } - - order++; - } - - list_for_each (e, &the_lnet.ln_remote_nets) { - rnet = list_entry(e, lnet_remotenet_t, lrn_list); - - if (rnet->lrn_net == dstnet) { - LASSERT (!list_empty(&rnet->lrn_routes)); - route = list_entry(rnet->lrn_routes.next, - lnet_route_t, lr_list); - hops = rnet->lrn_hops; - if (srcnidp != NULL) - *srcnidp = route->lr_gateway->lp_ni->ni_nid; - if (orderp != NULL) - *orderp = order; - LNET_UNLOCK(); - return hops + 1; - } - order++; - } - - LNET_UNLOCK(); - return -EHOSTUNREACH; -} - -int -LNetSetAsync(lnet_process_id_t id, int nasync) -{ -#ifdef __KERNEL__ - return 0; -#else - lnet_ni_t *ni; - lnet_remotenet_t *rnet; - struct list_head *tmp; - lnet_route_t *route; - lnet_nid_t *nids; - int nnids; - int maxnids = 256; - int rc = 0; - int rc2; - - /* Target on a local network? */ - - ni = lnet_net2ni(LNET_NIDNET(id.nid)); - if (ni != NULL) { - if (ni->ni_lnd->lnd_setasync != NULL) - rc = (ni->ni_lnd->lnd_setasync)(ni, id, nasync); - lnet_ni_decref(ni); - return rc; - } - - /* Target on a remote network: apply to routers */ - again: - LIBCFS_ALLOC(nids, maxnids * sizeof(*nids)); - if (nids == NULL) - return -ENOMEM; - nnids = 0; - - /* Snapshot all the router NIDs */ - LNET_LOCK(); - rnet = lnet_find_net_locked(LNET_NIDNET(id.nid)); - if (rnet != NULL) { - list_for_each(tmp, &rnet->lrn_routes) { - if (nnids == maxnids) { - LNET_UNLOCK(); - LIBCFS_FREE(nids, maxnids * sizeof(*nids)); - maxnids *= 2; - goto again; - } - - route = list_entry(tmp, lnet_route_t, lr_list); - nids[nnids++] = route->lr_gateway->lp_nid; - } - } - LNET_UNLOCK(); - - /* set async on all the routers */ - while (nnids-- > 0) { - id.pid = LUSTRE_SRV_LNET_PID; - id.nid = nids[nnids]; - - ni = lnet_net2ni(LNET_NIDNET(id.nid)); - if (ni == NULL) - continue; - - if (ni->ni_lnd->lnd_setasync != NULL) { - rc2 = (ni->ni_lnd->lnd_setasync)(ni, id, nasync); - if (rc2 != 0) - rc = rc2; - } - lnet_ni_decref(ni); - } - - LIBCFS_FREE(nids, maxnids * sizeof(*nids)); - return rc; -#endif -} - diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c deleted file mode 100644 index d29aa1e9ab621d769c22f86101fd2afc85fa8899..0000000000000000000000000000000000000000 --- a/lnet/lnet/lib-msg.c +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-msg.c - * Message decoding, parsing and finalizing routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -void -lnet_enq_event_locked (lnet_eq_t *eq, lnet_event_t *ev) -{ - lnet_event_t *eq_slot; - - /* Allocate the next queue slot */ - ev->sequence = eq->eq_enq_seq++; - - /* size must be a power of 2 to handle sequence # overflow */ - LASSERT (eq->eq_size != 0 && - eq->eq_size == LOWEST_BIT_SET (eq->eq_size)); - eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1)); - - /* There is no race since both event consumers and event producers - * take the LNET_LOCK, so we don't screw around with memory - * barriers, setting the sequence number last or wierd structure - * layout assertions. */ - *eq_slot = *ev; - - /* Call the callback handler (if any) */ - if (eq->eq_callback != NULL) - eq->eq_callback (eq_slot); - -#ifdef __KERNEL__ - /* Wake anyone waiting in LNetEQPoll() */ - if (cfs_waitq_active(&the_lnet.ln_waitq)) - cfs_waitq_broadcast(&the_lnet.ln_waitq); -#else -# ifndef HAVE_LIBPTHREAD - /* LNetEQPoll() calls into _the_ LND to wait for action */ -# else - /* Wake anyone waiting in LNetEQPoll() */ - pthread_cond_broadcast(&the_lnet.ln_cond); -# endif -#endif -} - -void -lnet_complete_msg_locked(lnet_msg_t *msg) -{ - lnet_handle_wire_t ack_wmd; - int rc; - int status = msg->msg_ev.status; - - LASSERT (msg->msg_onactivelist); - - if (status == 0 && msg->msg_ack) { - /* Only send an ACK if the PUT completed successfully */ - - lnet_return_credits_locked(msg); - - msg->msg_ack = 0; - LNET_UNLOCK(); - - LASSERT(msg->msg_ev.type == LNET_EVENT_PUT); - LASSERT(!msg->msg_routing); - - ack_wmd = msg->msg_hdr.msg.put.ack_wmd; - - lnet_prep_send(msg, LNET_MSG_ACK, msg->msg_ev.initiator, 0, 0); - - msg->msg_hdr.msg.ack.dst_wmd = ack_wmd; - msg->msg_hdr.msg.ack.match_bits = msg->msg_ev.match_bits; - msg->msg_hdr.msg.ack.mlength = cpu_to_le32(msg->msg_ev.mlength); - - rc = lnet_send(msg->msg_ev.target.nid, msg); - - LNET_LOCK(); - - if (rc == 0) - return; - } else if (status == 0 && /* OK so far */ - (msg->msg_routing && !msg->msg_sending)) { /* not forwarded */ - - LASSERT (!msg->msg_receiving); /* called back recv already */ - - LNET_UNLOCK(); - - rc = lnet_send(LNET_NID_ANY, msg); - - LNET_LOCK(); - - if (rc == 0) - return; - } - - lnet_return_credits_locked(msg); - - LASSERT (msg->msg_onactivelist); - msg->msg_onactivelist = 0; - list_del (&msg->msg_activelist); - the_lnet.ln_counters.msgs_alloc--; - lnet_msg_free(msg); -} - - -void -lnet_finalize (lnet_ni_t *ni, lnet_msg_t *msg, int status) -{ -#ifdef __KERNEL__ - int i; - int my_slot; -#endif - lnet_libmd_t *md; - - LASSERT (!in_interrupt ()); - - if (msg == NULL) - return; -#if 0 - CDEBUG(D_WARNING, "%s msg->%s Flags:%s%s%s%s%s%s%s%s%s%s%s txp %s rxp %s\n", - lnet_msgtyp2str(msg->msg_type), libcfs_id2str(msg->msg_target), - msg->msg_target_is_router ? "t" : "", - msg->msg_routing ? "X" : "", - msg->msg_ack ? "A" : "", - msg->msg_sending ? "S" : "", - msg->msg_receiving ? "R" : "", - msg->msg_delayed ? "d" : "", - msg->msg_txcredit ? "C" : "", - msg->msg_peertxcredit ? "c" : "", - msg->msg_rtrcredit ? "F" : "", - msg->msg_peerrtrcredit ? "f" : "", - msg->msg_onactivelist ? "!" : "", - msg->msg_txpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_txpeer->lp_nid), - msg->msg_rxpeer == NULL ? "<none>" : libcfs_nid2str(msg->msg_rxpeer->lp_nid)); -#endif - LNET_LOCK(); - - LASSERT (msg->msg_onactivelist); - - msg->msg_ev.status = status; - - md = msg->msg_md; - if (md != NULL) { - int unlink; - - /* Now it's safe to drop my caller's ref */ - md->md_refcount--; - LASSERT (md->md_refcount >= 0); - - unlink = lnet_md_unlinkable(md); - - msg->msg_ev.unlinked = unlink; - - if (md->md_eq != NULL) - lnet_enq_event_locked(md->md_eq, &msg->msg_ev); - - if (unlink) - lnet_md_unlink(md); - - msg->msg_md = NULL; - } - - list_add_tail (&msg->msg_list, &the_lnet.ln_finalizeq); - - /* Recursion breaker. Don't complete the message here if I am (or - * enough other threads are) already completing messages */ - -#ifdef __KERNEL__ - my_slot = -1; - for (i = 0; i < the_lnet.ln_nfinalizers; i++) { - if (the_lnet.ln_finalizers[i] == cfs_current()) - goto out; - if (my_slot < 0 && the_lnet.ln_finalizers[i] == NULL) - my_slot = i; - } - if (my_slot < 0) - goto out; - - the_lnet.ln_finalizers[my_slot] = cfs_current(); -#else - if (the_lnet.ln_finalizing) - goto out; -#endif - - while (!list_empty(&the_lnet.ln_finalizeq)) { - msg = list_entry(the_lnet.ln_finalizeq.next, - lnet_msg_t, msg_list); - - list_del(&msg->msg_list); - - /* NB drops and regains the lnet lock if it actually does - * anything, so my finalizing friends can chomp along too */ - lnet_complete_msg_locked(msg); - } - -#ifdef __KERNEL__ - the_lnet.ln_finalizers[my_slot] = NULL; -#else - the_lnet.ln_finalizing = 0; -#endif - - out: - LNET_UNLOCK(); -} - diff --git a/lnet/lnet/lo.c b/lnet/lnet/lo.c deleted file mode 100644 index e123b3d8b6914eb5366deb6c3ea99c8a2374617c..0000000000000000000000000000000000000000 --- a/lnet/lnet/lo.c +++ /dev/null @@ -1,112 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2004 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -int -lolnd_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - LASSERT (!lntmsg->msg_routing); - LASSERT (!lntmsg->msg_target_is_router); - - return lnet_parse(ni, &lntmsg->msg_hdr, ni->ni_nid, lntmsg, 0); -} - -int -lolnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - lnet_msg_t *sendmsg = private; - - if (lntmsg != NULL) { /* not discarding */ - if (sendmsg->msg_iov != NULL) { - if (iov != NULL) - lnet_copy_iov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - else - lnet_copy_iov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_iov, - sendmsg->msg_offset, mlen); - } else { - if (iov != NULL) - lnet_copy_kiov2iov(niov, iov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - else - lnet_copy_kiov2kiov(niov, kiov, offset, - sendmsg->msg_niov, - sendmsg->msg_kiov, - sendmsg->msg_offset, mlen); - } - - lnet_finalize(ni, lntmsg, 0); - } - - lnet_finalize(ni, sendmsg, 0); - return 0; -} - -static int lolnd_instanced; - -void -lolnd_shutdown(lnet_ni_t *ni) -{ - CDEBUG (D_NET, "shutdown\n"); - LASSERT (lolnd_instanced); - - lolnd_instanced = 0; -} - -int -lolnd_startup (lnet_ni_t *ni) -{ - LASSERT (ni->ni_lnd == &the_lolnd); - LASSERT (!lolnd_instanced); - lolnd_instanced = 1; - - return (0); -} - -lnd_t the_lolnd = { - /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list}, - /* .lnd_refcount = */ 0, - /* .lnd_type = */ LOLND, - /* .lnd_startup = */ lolnd_startup, - /* .lnd_shutdown = */ lolnd_shutdown, - /* .lnt_ctl = */ NULL, - /* .lnd_send = */ lolnd_send, - /* .lnd_recv = */ lolnd_recv, - /* .lnd_eager_recv = */ NULL, - /* .lnd_notify = */ NULL, -#ifdef __KERNEL__ - /* .lnd_accept = */ NULL -#else - /* .lnd_wait = */ NULL -#endif -}; - diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c deleted file mode 100644 index d612fafc68d28e9dba9273b0df03562eada297d3..0000000000000000000000000000000000000000 --- a/lnet/lnet/module.c +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -static int config_on_load = 0; -CFS_MODULE_PARM(config_on_load, "i", int, 0444, - "configure network at module load"); - -static struct semaphore lnet_config_mutex; - -int -lnet_configure (void *arg) -{ - /* 'arg' only there so I can be passed to cfs_kernel_thread() */ - int rc = 0; - - LNET_MUTEX_DOWN(&lnet_config_mutex); - - if (!the_lnet.ln_niinit_self) { - rc = LNetNIInit(LUSTRE_SRV_LNET_PID); - if (rc >= 0) { - the_lnet.ln_niinit_self = 1; - rc = 0; - } - } - - LNET_MUTEX_UP(&lnet_config_mutex); - return rc; -} - -int -lnet_unconfigure (void) -{ - int refcount; - - LNET_MUTEX_DOWN(&lnet_config_mutex); - - if (the_lnet.ln_niinit_self) { - the_lnet.ln_niinit_self = 0; - LNetNIFini(); - } - - LNET_MUTEX_DOWN(&the_lnet.ln_api_mutex); - refcount = the_lnet.ln_refcount; - LNET_MUTEX_UP(&the_lnet.ln_api_mutex); - - LNET_MUTEX_UP(&lnet_config_mutex); - return (refcount == 0) ? 0 : -EBUSY; -} - -int -lnet_ioctl(unsigned int cmd, struct libcfs_ioctl_data *data) -{ - int rc; - - switch (cmd) { - case IOC_LIBCFS_CONFIGURE: - return lnet_configure(NULL); - - case IOC_LIBCFS_UNCONFIGURE: - return lnet_unconfigure(); - - default: - /* Passing LNET_PID_ANY only gives me a ref if the net is up - * already; I'll need it to ensure the net can't go down while - * I'm called into it */ - rc = LNetNIInit(LNET_PID_ANY); - if (rc >= 0) { - rc = LNetCtl(cmd, data); - LNetNIFini(); - } - return rc; - } -} - -DECLARE_IOCTL_HANDLER(lnet_ioctl_handler, lnet_ioctl); - -int -init_lnet(void) -{ - int rc; - ENTRY; - - init_mutex(&lnet_config_mutex); - - rc = LNetInit(); - if (rc != 0) { - CERROR("LNetInit: error %d\n", rc); - RETURN(rc); - } - - rc = libcfs_register_ioctl(&lnet_ioctl_handler); - LASSERT (rc == 0); - - if (config_on_load) { - /* Have to schedule a separate thread to avoid deadlocking - * in modload */ - (void) cfs_kernel_thread(lnet_configure, NULL, 0); - } - - RETURN(0); -} - -void -fini_lnet(void) -{ - int rc; - - rc = libcfs_deregister_ioctl(&lnet_ioctl_handler); - LASSERT (rc == 0); - - LNetFini(); -} - -EXPORT_SYMBOL(lnet_register_lnd); -EXPORT_SYMBOL(lnet_unregister_lnd); - -EXPORT_SYMBOL(LNetMEAttach); -EXPORT_SYMBOL(LNetMEInsert); -EXPORT_SYMBOL(LNetMEUnlink); -EXPORT_SYMBOL(LNetEQAlloc); -EXPORT_SYMBOL(LNetMDAttach); -EXPORT_SYMBOL(LNetMDUnlink); -EXPORT_SYMBOL(LNetNIInit); -EXPORT_SYMBOL(LNetNIFini); -EXPORT_SYMBOL(LNetInit); -EXPORT_SYMBOL(LNetFini); -EXPORT_SYMBOL(LNetSnprintHandle); -EXPORT_SYMBOL(LNetPut); -EXPORT_SYMBOL(LNetGet); -EXPORT_SYMBOL(LNetEQWait); -EXPORT_SYMBOL(LNetEQFree); -EXPORT_SYMBOL(LNetEQGet); -EXPORT_SYMBOL(LNetGetId); -EXPORT_SYMBOL(LNetMDBind); -EXPORT_SYMBOL(LNetDist); -EXPORT_SYMBOL(LNetSetAsync); -EXPORT_SYMBOL(LNetCtl); -EXPORT_SYMBOL(LNetSetLazyPortal); -EXPORT_SYMBOL(LNetClearLazyPortal); -EXPORT_SYMBOL(the_lnet); -EXPORT_SYMBOL(lnet_iov_nob); -EXPORT_SYMBOL(lnet_extract_iov); -EXPORT_SYMBOL(lnet_kiov_nob); -EXPORT_SYMBOL(lnet_extract_kiov); -EXPORT_SYMBOL(lnet_copy_iov2iov); -EXPORT_SYMBOL(lnet_copy_iov2kiov); -EXPORT_SYMBOL(lnet_copy_kiov2iov); -EXPORT_SYMBOL(lnet_copy_kiov2kiov); -EXPORT_SYMBOL(lnet_finalize); -EXPORT_SYMBOL(lnet_parse); -EXPORT_SYMBOL(lnet_create_reply_msg); -EXPORT_SYMBOL(lnet_set_reply_msg_len); -EXPORT_SYMBOL(lnet_msgtyp2str); -EXPORT_SYMBOL(lnet_net2ni_locked); - -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -cfs_module(lnet, "1.0.0", init_lnet, fini_lnet); diff --git a/lnet/lnet/peer.c b/lnet/lnet/peer.c deleted file mode 100644 index 6ac1d1e4fed13b931d4302dcd7397e0e7a64e49b..0000000000000000000000000000000000000000 --- a/lnet/lnet/peer.c +++ /dev/null @@ -1,244 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -#include <lnet/lib-lnet.h> - -int -lnet_create_peer_table(void) -{ - struct list_head *hash; - int i; - - LASSERT (the_lnet.ln_peer_hash == NULL); - LIBCFS_ALLOC(hash, LNET_PEER_HASHSIZE * sizeof(struct list_head)); - - if (hash == NULL) { - CERROR("Can't allocate peer hash table\n"); - return -ENOMEM; - } - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) - CFS_INIT_LIST_HEAD(&hash[i]); - - the_lnet.ln_peer_hash = hash; - return 0; -} - -void -lnet_destroy_peer_table(void) -{ - int i; - - if (the_lnet.ln_peer_hash == NULL) - return; - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) - LASSERT (list_empty(&the_lnet.ln_peer_hash[i])); - - LIBCFS_FREE(the_lnet.ln_peer_hash, - LNET_PEER_HASHSIZE * sizeof (struct list_head)); - the_lnet.ln_peer_hash = NULL; -} - -void -lnet_clear_peer_table(void) -{ - int i; - - LASSERT (the_lnet.ln_shutdown); /* i.e. no new peers */ - - for (i = 0; i < LNET_PEER_HASHSIZE; i++) { - struct list_head *peers = &the_lnet.ln_peer_hash[i]; - - LNET_LOCK(); - while (!list_empty(peers)) { - lnet_peer_t *lp = list_entry(peers->next, - lnet_peer_t, lp_hashlist); - - list_del(&lp->lp_hashlist); - lnet_peer_decref_locked(lp); /* lose hash table's ref */ - } - LNET_UNLOCK(); - } - - LNET_LOCK(); - for (i = 3; the_lnet.ln_npeers != 0;i++) { - LNET_UNLOCK(); - - if ((i & (i-1)) == 0) - CDEBUG(D_WARNING,"Waiting for %d peers\n", - the_lnet.ln_npeers); - cfs_pause(cfs_time_seconds(1)); - - LNET_LOCK(); - } - LNET_UNLOCK(); -} - -void -lnet_destroy_peer_locked (lnet_peer_t *lp) -{ - lnet_ni_decref_locked(lp->lp_ni); - LNET_UNLOCK(); - - LASSERT (lp->lp_refcount == 0); - LASSERT (lp->lp_rtr_refcount == 0); - LASSERT (list_empty(&lp->lp_txq)); - LASSERT (lp->lp_txqnob == 0); - - LIBCFS_FREE(lp, sizeof(*lp)); - - LNET_LOCK(); - - LASSERT(the_lnet.ln_npeers > 0); - the_lnet.ln_npeers--; -} - -lnet_peer_t * -lnet_find_peer_locked (lnet_nid_t nid) -{ - unsigned int idx = LNET_NIDADDR(nid) % LNET_PEER_HASHSIZE; - struct list_head *peers = &the_lnet.ln_peer_hash[idx]; - struct list_head *tmp; - lnet_peer_t *lp; - - if (the_lnet.ln_shutdown) - return NULL; - - list_for_each (tmp, peers) { - lp = list_entry(tmp, lnet_peer_t, lp_hashlist); - - if (lp->lp_nid == nid) { - lnet_peer_addref_locked(lp); - return lp; - } - } - - return NULL; -} - -int -lnet_nid2peer_locked(lnet_peer_t **lpp, lnet_nid_t nid) -{ - lnet_peer_t *lp; - lnet_peer_t *lp2; - - lp = lnet_find_peer_locked(nid); - if (lp != NULL) { - *lpp = lp; - return 0; - } - - LNET_UNLOCK(); - - LIBCFS_ALLOC(lp, sizeof(*lp)); - if (lp == NULL) { - *lpp = NULL; - LNET_LOCK(); - return -ENOMEM; - } - - memset(lp, 0, sizeof(*lp)); /* zero counters etc */ - - CFS_INIT_LIST_HEAD(&lp->lp_txq); - CFS_INIT_LIST_HEAD(&lp->lp_rtrq); - - lp->lp_alive = !lnet_peers_start_down(); /* 1 bit!! */ - lp->lp_notify = 0; - lp->lp_notifylnd = 0; - lp->lp_notifying = 0; - lp->lp_alive_count = 0; - lp->lp_timestamp = 0; - lp->lp_ping_timestamp = 0; - lp->lp_nid = nid; - lp->lp_refcount = 2; /* 1 for caller; 1 for hash */ - lp->lp_rtr_refcount = 0; - - LNET_LOCK(); - - lp2 = lnet_find_peer_locked(nid); - if (lp2 != NULL) { - LNET_UNLOCK(); - LIBCFS_FREE(lp, sizeof(*lp)); - LNET_LOCK(); - - *lpp = lp2; - return 0; - } - - lp->lp_ni = lnet_net2ni_locked(LNET_NIDNET(nid)); - if (lp->lp_ni == NULL) { - LNET_UNLOCK(); - LIBCFS_FREE(lp, sizeof(*lp)); - LNET_LOCK(); - - *lpp = NULL; - return the_lnet.ln_shutdown ? -ESHUTDOWN : -EHOSTUNREACH; - } - - lp->lp_txcredits = - lp->lp_mintxcredits = lp->lp_ni->ni_peertxcredits; - - /* As a first approximation; allow this peer the same number of router - * buffers as it is allowed outstanding sends */ - lp->lp_rtrcredits = lp->lp_minrtrcredits = lp->lp_txcredits; - - LASSERT (!the_lnet.ln_shutdown); - /* can't add peers after shutdown starts */ - - list_add_tail(&lp->lp_hashlist, lnet_nid2peerhash(nid)); - the_lnet.ln_npeers++; - the_lnet.ln_peertable_version++; - *lpp = lp; - return 0; -} - -void -lnet_debug_peer(lnet_nid_t nid) -{ - int rc; - lnet_peer_t *lp; - - LNET_LOCK(); - - rc = lnet_nid2peer_locked(&lp, nid); - if (rc != 0) { - LNET_UNLOCK(); - CDEBUG(D_WARNING, "No peer %s\n", libcfs_nid2str(nid)); - return; - } - - CDEBUG(D_WARNING, "%-24s %4d %5s %5d %5d %5d %5d %5d %ld\n", - libcfs_nid2str(lp->lp_nid), lp->lp_refcount, - lp->lp_alive ? "up" : "down", - lp->lp_ni->ni_peertxcredits, - lp->lp_rtrcredits, lp->lp_minrtrcredits, - lp->lp_txcredits, lp->lp_mintxcredits, lp->lp_txqnob); - - lnet_peer_decref_locked(lp); - - LNET_UNLOCK(); -} diff --git a/lnet/lnet/portals.xcode/project.pbxproj b/lnet/lnet/portals.xcode/project.pbxproj deleted file mode 100644 index 1dc0146c43a91ce8a117c6790b6fbf3d44da0a60..0000000000000000000000000000000000000000 --- a/lnet/lnet/portals.xcode/project.pbxproj +++ /dev/null @@ -1,430 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = portals; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 19A778270730EACD00846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778280730EACD00846375 = { - fileRef = 19A778270730EACD00846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7782B0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-errno.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782C0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782D0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-wrap.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782E0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-eq.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782F0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-init.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778300730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-md.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778310730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-me.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778320730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-move.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778330730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-msg.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778340730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778350730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-pid.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778360730EB8400846375 = { - fileRef = 19A7782B0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778370730EB8400846375 = { - fileRef = 19A7782C0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778380730EB8400846375 = { - fileRef = 19A7782D0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778390730EB8400846375 = { - fileRef = 19A7782E0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783A0730EB8400846375 = { - fileRef = 19A7782F0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783B0730EB8400846375 = { - fileRef = 19A778300730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783C0730EB8400846375 = { - fileRef = 19A778310730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783D0730EB8400846375 = { - fileRef = 19A778320730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783E0730EB8400846375 = { - fileRef = 19A778330730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783F0730EB8400846375 = { - fileRef = 19A778340730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778400730EB8400846375 = { - fileRef = 19A778350730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 19A7782B0730EB8400846375, - 19A7782C0730EB8400846375, - 19A7782D0730EB8400846375, - 19A7782E0730EB8400846375, - 19A7782F0730EB8400846375, - 19A778300730EB8400846375, - 19A778310730EB8400846375, - 19A778320730EB8400846375, - 19A778330730EB8400846375, - 19A778340730EB8400846375, - 19A778350730EB8400846375, - 19A778270730EACD00846375, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = ../include; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.portals.portals; - MODULE_START = portals_start; - MODULE_STOP = portals_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = portals; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = portals; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = portals; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 19A778280730EACD00846375, - 19A778360730EB8400846375, - 19A778370730EB8400846375, - 19A778380730EB8400846375, - 19A778390730EB8400846375, - 19A7783A0730EB8400846375, - 19A7783B0730EB8400846375, - 19A7783C0730EB8400846375, - 19A7783D0730EB8400846375, - 19A7783E0730EB8400846375, - 19A7783F0730EB8400846375, - 19A778400730EB8400846375, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = portals.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c deleted file mode 100644 index 7d3e66e29ea1bd021b2332c07fbe4ed7ab47271d..0000000000000000000000000000000000000000 --- a/lnet/lnet/router.c +++ /dev/null @@ -1,1072 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <lnet/lib-lnet.h> - -#if defined(__KERNEL__) && defined(LNET_ROUTER) - -static char *forwarding = ""; -CFS_MODULE_PARM(forwarding, "s", charp, 0444, - "Explicitly enable/disable forwarding between networks"); - -static int tiny_router_buffers = 512; -CFS_MODULE_PARM(tiny_router_buffers, "i", int, 0444, - "# of 0 payload messages to buffer in the router"); -static int small_router_buffers = 256; -CFS_MODULE_PARM(small_router_buffers, "i", int, 0444, - "# of small (1 page) messages to buffer in the router"); -static int large_router_buffers = 32; -CFS_MODULE_PARM(large_router_buffers, "i", int, 0444, - "# of large messages to buffer in the router"); - -static int auto_down = 1; -CFS_MODULE_PARM(auto_down, "i", int, 0444, - "Automatically mark peers down on comms error"); - -static int check_routers_before_use = 0; -CFS_MODULE_PARM(check_routers_before_use, "i", int, 0444, - "Assume routers are down and ping them before use"); - -static int dead_router_check_interval = 0; -CFS_MODULE_PARM(dead_router_check_interval, "i", int, 0444, - "Seconds between dead router health checks (<= 0 to disable)"); - -static int live_router_check_interval = 0; -CFS_MODULE_PARM(live_router_check_interval, "i", int, 0444, - "Seconds between live router health checks (<= 0 to disable)"); - -static int router_ping_timeout = 50; -CFS_MODULE_PARM(router_ping_timeout, "i", int, 0444, - "Seconds to wait for the reply to a router health query"); - -int -lnet_peers_start_down(void) -{ - return check_routers_before_use; -} - -void -lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, time_t when) -{ - if (when < lp->lp_timestamp) { /* out of date information */ - CDEBUG(D_NET, "Out of date\n"); - return; - } - - lp->lp_timestamp = when; /* update timestamp */ - lp->lp_ping_deadline = 0; /* disable ping timeout */ - - if (lp->lp_alive_count != 0 && /* got old news */ - (!lp->lp_alive) == (!alive)) { /* new date for old news */ - CDEBUG(D_NET, "Old news\n"); - return; - } - - /* Flag that notification is outstanding */ - - lp->lp_alive_count++; - lp->lp_alive = !(!alive); /* 1 bit! */ - lp->lp_notify = 1; - lp->lp_notifylnd |= notifylnd; - - CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive); -} - -void -lnet_do_notify (lnet_peer_t *lp) -{ - lnet_ni_t *ni = lp->lp_ni; - int alive; - int notifylnd; - - LNET_LOCK(); - - /* Notify only in 1 thread at any time to ensure ordered notification. - * NB individual events can be missed; the only guarantee is that you - * always get the most recent news */ - - if (lp->lp_notifying) { - LNET_UNLOCK(); - return; - } - - lp->lp_notifying = 1; - - while (lp->lp_notify) { - alive = lp->lp_alive; - notifylnd = lp->lp_notifylnd; - - lp->lp_notifylnd = 0; - lp->lp_notify = 0; - - if (notifylnd && ni->ni_lnd->lnd_notify != NULL) { - LNET_UNLOCK(); - - /* A new notification could happen now; I'll handle it - * when control returns to me */ - - (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive); - - LNET_LOCK(); - } - } - - lp->lp_notifying = 0; - - LNET_UNLOCK(); -} - -int -lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when) -{ - lnet_peer_t *lp = NULL; - time_t now = cfs_time_current_sec(); - - LASSERT (!in_interrupt ()); - - CDEBUG (D_NET, "%s notifying %s: %s\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), - alive ? "up" : "down"); - - if (ni != NULL && - LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) { - CWARN ("Ignoring notification of %s %s by %s (different net)\n", - libcfs_nid2str(nid), alive ? "birth" : "death", - libcfs_nid2str(ni->ni_nid)); - return -EINVAL; - } - - /* can't do predictions... */ - if (when > now) { - CWARN ("Ignoring prediction from %s of %s %s " - "%ld seconds in the future\n", - (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid), - libcfs_nid2str(nid), alive ? "up" : "down", - when - now); - return -EINVAL; - } - - if (ni != NULL && !alive && /* LND telling me she's down */ - !auto_down) { /* auto-down disabled */ - CDEBUG(D_NET, "Auto-down disabled\n"); - return 0; - } - - LNET_LOCK(); - - lp = lnet_find_peer_locked(nid); - if (lp == NULL) { - /* nid not found */ - LNET_UNLOCK(); - CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid)); - return 0; - } - - lnet_notify_locked(lp, ni == NULL, alive, when); - - LNET_UNLOCK(); - - lnet_do_notify(lp); - - LNET_LOCK(); - - lnet_peer_decref_locked(lp); - - LNET_UNLOCK(); - return 0; -} -EXPORT_SYMBOL(lnet_notify); - -#else - -int -lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, time_t when) -{ - return -EOPNOTSUPP; -} - -#endif - -static void -lnet_rtr_addref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - LASSERT (lp->lp_rtr_refcount >= 0); - - lp->lp_rtr_refcount++; - if (lp->lp_rtr_refcount == 1) { - struct list_head *pos; - - /* a simple insertion sort */ - list_for_each_prev(pos, &the_lnet.ln_routers) { - lnet_peer_t *rtr = list_entry(pos, lnet_peer_t, - lp_rtr_list); - - if (rtr->lp_nid < lp->lp_nid) - break; - } - - list_add(&lp->lp_rtr_list, pos); - /* addref for the_lnet.ln_routers */ - lnet_peer_addref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -static void -lnet_rtr_decref_locked(lnet_peer_t *lp) -{ - LASSERT (lp->lp_refcount > 0); - LASSERT (lp->lp_rtr_refcount > 0); - - lp->lp_rtr_refcount--; - if (lp->lp_rtr_refcount == 0) { - list_del(&lp->lp_rtr_list); - /* decref for the_lnet.ln_routers */ - lnet_peer_decref_locked(lp); - the_lnet.ln_routers_version++; - } -} - -lnet_remotenet_t * -lnet_find_net_locked (__u32 net) -{ - lnet_remotenet_t *rnet; - struct list_head *tmp; - - LASSERT (!the_lnet.ln_shutdown); - - list_for_each (tmp, &the_lnet.ln_remote_nets) { - rnet = list_entry(tmp, lnet_remotenet_t, lrn_list); - - if (rnet->lrn_net == net) - return rnet; - } - return NULL; -} - -int -lnet_add_route (__u32 net, unsigned int hops, lnet_nid_t gateway) -{ - struct list_head zombies; - struct list_head *e; - lnet_remotenet_t *rnet; - lnet_remotenet_t *rnet2; - lnet_route_t *route; - lnet_route_t *route2; - lnet_ni_t *ni; - int add_route; - int rc; - - CDEBUG(D_NET, "Add route: net %s hops %u gw %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - - if (gateway == LNET_NID_ANY || - LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND || - net == LNET_NIDNET(LNET_NID_ANY) || - LNET_NETTYP(net) == LOLND || - LNET_NIDNET(gateway) == net || - hops < 1 || hops > 255) - return (-EINVAL); - - if (lnet_islocalnet(net)) /* it's a local network */ - return 0; /* ignore the route entry */ - - /* Assume net, route, all new */ - LIBCFS_ALLOC(route, sizeof(*route)); - LIBCFS_ALLOC(rnet, sizeof(*rnet)); - if (route == NULL || rnet == NULL) { - CERROR("Out of memory creating route %s %d %s\n", - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - if (route != NULL) - LIBCFS_FREE(route, sizeof(*route)); - if (rnet != NULL) - LIBCFS_FREE(rnet, sizeof(*rnet)); - return -ENOMEM; - } - - INIT_LIST_HEAD(&rnet->lrn_routes); - rnet->lrn_net = net; - rnet->lrn_hops = hops; - - LNET_LOCK(); - - rc = lnet_nid2peer_locked(&route->lr_gateway, gateway); - if (rc != 0) { - LNET_UNLOCK(); - - LIBCFS_FREE(route, sizeof(*route)); - LIBCFS_FREE(rnet, sizeof(*rnet)); - - if (rc == -EHOSTUNREACH) /* gateway is not on a local net */ - return 0; /* ignore the route entry */ - - CERROR("Error %d creating route %s %d %s\n", rc, - libcfs_net2str(net), hops, libcfs_nid2str(gateway)); - return rc; - } - - LASSERT (!the_lnet.ln_shutdown); - CFS_INIT_LIST_HEAD(&zombies); - - rnet2 = lnet_find_net_locked(net); - if (rnet2 == NULL) { - /* new network */ - list_add_tail(&rnet->lrn_list, &the_lnet.ln_remote_nets); - rnet2 = rnet; - } - - if (hops > rnet2->lrn_hops) { - /* New route is longer; ignore it */ - add_route = 0; - } else if (hops < rnet2->lrn_hops) { - /* new route supercedes all currently known routes to this - * net */ - list_add(&zombies, &rnet2->lrn_routes); - list_del_init(&rnet2->lrn_routes); - add_route = 1; - } else { - add_route = 1; - /* New route has the same hopcount as existing routes; search - * for a duplicate route (it's a NOOP if it is) */ - list_for_each (e, &rnet2->lrn_routes) { - route2 = list_entry(e, lnet_route_t, lr_list); - - if (route2->lr_gateway == route->lr_gateway) { - add_route = 0; - break; - } - - /* our loopups must be true */ - LASSERT (route2->lr_gateway->lp_nid != gateway); - } - } - - if (add_route) { - ni = route->lr_gateway->lp_ni; - lnet_ni_addref_locked(ni); - - LASSERT (rc == 0); - list_add_tail(&route->lr_list, &rnet2->lrn_routes); - the_lnet.ln_remote_nets_version++; - - lnet_rtr_addref_locked(route->lr_gateway); - - LNET_UNLOCK(); - - /* XXX Assume alive */ - if (ni->ni_lnd->lnd_notify != NULL) - (ni->ni_lnd->lnd_notify)(ni, gateway, 1); - - lnet_ni_decref(ni); - } else { - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - LIBCFS_FREE(route, sizeof(*route)); - } - - if (rnet != rnet2) - LIBCFS_FREE(rnet, sizeof(*rnet)); - - while (!list_empty(&zombies)) { - route = list_entry(zombies.next, lnet_route_t, lr_list); - list_del(&route->lr_list); - - LNET_LOCK(); - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - LIBCFS_FREE(route, sizeof(*route)); - } - - return rc; -} - -int -lnet_check_routes (void) -{ - lnet_remotenet_t *rnet; - lnet_route_t *route; - lnet_route_t *route2; - struct list_head *e1; - struct list_head *e2; - - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - route2 = NULL; - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (route2 == NULL) - route2 = route; - else if (route->lr_gateway->lp_ni != - route2->lr_gateway->lp_ni) { - LNET_UNLOCK(); - - CERROR("Routes to %s via %s and %s not supported\n", - libcfs_net2str(rnet->lrn_net), - libcfs_nid2str(route->lr_gateway->lp_nid), - libcfs_nid2str(route2->lr_gateway->lp_nid)); - return -EINVAL; - } - } - } - - LNET_UNLOCK(); - return 0; -} - -int -lnet_del_route (__u32 net, lnet_nid_t gw_nid) -{ - lnet_remotenet_t *rnet; - lnet_route_t *route; - struct list_head *e1; - struct list_head *e2; - int rc = -ENOENT; - - CDEBUG(D_NET, "Del route: net %s : gw %s\n", - libcfs_net2str(net), libcfs_nid2str(gw_nid)); - - /* NB Caller may specify either all routes via the given gateway - * or a specific route entry actual NIDs) */ - - again: - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - if (!(net == LNET_NIDNET(LNET_NID_ANY) || - net == rnet->lrn_net)) - continue; - - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (!(gw_nid == LNET_NID_ANY || - gw_nid == route->lr_gateway->lp_nid)) - continue; - - list_del(&route->lr_list); - the_lnet.ln_remote_nets_version++; - - if (list_empty(&rnet->lrn_routes)) - list_del(&rnet->lrn_list); - else - rnet = NULL; - - lnet_rtr_decref_locked(route->lr_gateway); - lnet_peer_decref_locked(route->lr_gateway); - LNET_UNLOCK(); - - LIBCFS_FREE(route, sizeof (*route)); - - if (rnet != NULL) - LIBCFS_FREE(rnet, sizeof(*rnet)); - - rc = 0; - goto again; - } - } - - LNET_UNLOCK(); - return rc; -} - -void -lnet_destroy_routes (void) -{ - lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY); -} - -int -lnet_get_route (int idx, __u32 *net, __u32 *hops, - lnet_nid_t *gateway, __u32 *alive) -{ - struct list_head *e1; - struct list_head *e2; - lnet_remotenet_t *rnet; - lnet_route_t *route; - - LNET_LOCK(); - - list_for_each (e1, &the_lnet.ln_remote_nets) { - rnet = list_entry(e1, lnet_remotenet_t, lrn_list); - - list_for_each (e2, &rnet->lrn_routes) { - route = list_entry(e2, lnet_route_t, lr_list); - - if (idx-- == 0) { - *net = rnet->lrn_net; - *hops = rnet->lrn_hops; - *gateway = route->lr_gateway->lp_nid; - *alive = route->lr_gateway->lp_alive; - LNET_UNLOCK(); - return 0; - } - } - } - - LNET_UNLOCK(); - return -ENOENT; -} - -#if defined(__KERNEL__) && defined(LNET_ROUTER) -static void -lnet_router_checker_event (lnet_event_t *event) -{ - /* CAVEAT EMPTOR: I'm called with LNET_LOCKed and I'm not allowed to - * drop it (that's how come I see _every_ event, even ones that would - * overflow my EQ) */ - lnet_peer_t *lp; - lnet_nid_t nid; - - if (event->unlinked) { - /* The router checker thread has unlinked the rc_md - * and exited. */ - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKING); - the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKED; - mutex_up(&the_lnet.ln_rc_signal); - return; - } - - LASSERT (event->type == LNET_EVENT_SEND || - event->type == LNET_EVENT_REPLY); - - nid = (event->type == LNET_EVENT_SEND) ? - event->target.nid : event->initiator.nid; - - lp = lnet_find_peer_locked(nid); - if (lp == NULL) { - /* router may have been removed */ - CDEBUG(D_NET, "Router %s not found\n", libcfs_nid2str(nid)); - return; - } - - if (event->type == LNET_EVENT_SEND) /* re-enable another ping */ - lp->lp_ping_notsent = 0; - - if (lnet_isrouter(lp) && /* ignore if no longer a router */ - (event->status != 0 || - event->type == LNET_EVENT_REPLY)) { - - /* A successful REPLY means the router is up. If _any_ comms - * to the router fail I assume it's down (this will happen if - * we ping alive routers to try to detect router death before - * apps get burned). */ - - lnet_notify_locked(lp, 1, (event->status == 0), - cfs_time_current_sec()); - - /* The router checker will wake up very shortly and do the - * actual notification. - * XXX If 'lp' stops being a router before then, it will still - * have the notification pending!!! */ - } - - /* This decref will NOT drop LNET_LOCK (it had to have 1 ref when it - * was in the peer table and I've not dropped the lock, so no-one else - * can have reduced the refcount) */ - LASSERT(lp->lp_refcount > 1); - - lnet_peer_decref_locked(lp); -} - -static int -lnet_router_checker(void *arg) -{ - static lnet_ping_info_t pinginfo; - - int rc; - lnet_handle_md_t mdh; - lnet_peer_t *rtr; - struct list_head *entry; - time_t now; - lnet_process_id_t rtr_id; - int secs; - - cfs_daemonize("router_checker"); - cfs_block_allsigs(); - - rtr_id.pid = LUSTRE_SRV_LNET_PID; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - rc = LNetMDBind((lnet_md_t){.start = &pinginfo, - .length = sizeof(pinginfo), - .threshold = LNET_MD_THRESH_INF, - .options = LNET_MD_TRUNCATE, - .eq_handle = the_lnet.ln_rc_eqh}, - LNET_UNLINK, - &mdh); - - if (rc < 0) { - CERROR("Can't bind MD: %d\n", rc); - the_lnet.ln_rc_state = rc; - mutex_up(&the_lnet.ln_rc_signal); - return rc; - } - - LASSERT (rc == 0); - - the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING; - mutex_up(&the_lnet.ln_rc_signal); /* let my parent go */ - - while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) { - __u64 version; - - LNET_LOCK(); -rescan: - version = the_lnet.ln_routers_version; - - list_for_each (entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - - lnet_peer_addref_locked(rtr); - - now = cfs_time_current_sec(); - - if (rtr->lp_ping_deadline != 0 && /* ping timed out? */ - now > rtr->lp_ping_deadline) - lnet_notify_locked(rtr, 1, 0, now); - - LNET_UNLOCK(); - - /* Run any outstanding notificiations */ - lnet_do_notify(rtr); - - if (rtr->lp_alive) { - secs = live_router_check_interval; - } else { - secs = dead_router_check_interval; - } - if (secs <= 0) - secs = 0; - - if (secs != 0 && - !rtr->lp_ping_notsent && - now > rtr->lp_ping_timestamp + secs) { - CDEBUG(D_NET, "Check: %s\n", - libcfs_nid2str(rtr->lp_nid)); - - LNET_LOCK(); - rtr_id.nid = rtr->lp_nid; - rtr->lp_ping_notsent = 1; - rtr->lp_ping_timestamp = now; - - if (rtr->lp_ping_deadline == 0) - rtr->lp_ping_deadline = - now + router_ping_timeout; - - LNET_UNLOCK(); - - LNetGet(LNET_NID_ANY, mdh, rtr_id, - LNET_RESERVED_PORTAL, - LNET_PROTO_PING_MATCHBITS, 0); - } - - LNET_LOCK(); - lnet_peer_decref_locked(rtr); - - if (version != the_lnet.ln_routers_version) { - /* the routers list has changed */ - goto rescan; - } - } - - LNET_UNLOCK(); - - /* Call cfs_pause() here always adds 1 to load average - * because kernel counts # active tasks as nr_running - * + nr_uninterruptible. */ - set_current_state(CFS_TASK_INTERRUPTIBLE); - cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(1)); - } - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_STOPTHREAD); - the_lnet.ln_rc_state = LNET_RC_STATE_UNLINKING; - - rc = LNetMDUnlink(mdh); - LASSERT (rc == 0); - - /* The unlink event callback will signal final completion */ - - return 0; -} - - -void -lnet_wait_known_routerstate(void) -{ - lnet_peer_t *rtr; - struct list_head *entry; - int all_known; - - for (;;) { - LNET_LOCK(); - - all_known = 1; - list_for_each (entry, &the_lnet.ln_routers) { - rtr = list_entry(entry, lnet_peer_t, lp_rtr_list); - - if (rtr->lp_alive_count == 0) { - all_known = 0; - break; - } - } - - LNET_UNLOCK(); - - if (all_known) - return; - - cfs_pause(cfs_time_seconds(1)); - } -} - -void -lnet_router_checker_stop(void) -{ - int rc; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING || - the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN) - return; - - the_lnet.ln_rc_state = LNET_RC_STATE_STOPTHREAD; - /* block until event callback signals exit */ - mutex_down(&the_lnet.ln_rc_signal); - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_UNLINKED); - - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT (rc == 0); - - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; -} - -int -lnet_router_checker_start(void) -{ - int rc; - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN); - - if (check_routers_before_use && - dead_router_check_interval <= 0) { - LCONSOLE_ERROR("'dead_router_check_interval' must be set if " - "'check_routers_before_use' is set\n"); - return -EINVAL; - } - - if (live_router_check_interval <= 0 && - dead_router_check_interval <= 0) - return 0; - - init_mutex_locked(&the_lnet.ln_rc_signal); - - /* EQ size doesn't matter; the callback is guaranteed to get every - * event */ - rc = LNetEQAlloc(1, lnet_router_checker_event, - &the_lnet.ln_rc_eqh); - if (rc != 0) { - CERROR("Can't allocate EQ: %d\n", rc); - return -ENOMEM; - } - - rc = (int)cfs_kernel_thread(lnet_router_checker, NULL, 0); - if (rc < 0) { - CERROR("Can't start router checker thread: %d\n", rc); - goto failed; - } - - mutex_down(&the_lnet.ln_rc_signal); /* wait for checker to startup */ - - rc = the_lnet.ln_rc_state; - if (rc < 0) { - the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN; - goto failed; - } - - LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); - - if (check_routers_before_use) { - /* Note that a helpful side-effect of pinging all known routers - * at startup is that it makes them drop stale connections they - * may have to a previous instance of me. */ - lnet_wait_known_routerstate(); - } - - return 0; - - failed: - rc = LNetEQFree(the_lnet.ln_rc_eqh); - LASSERT (rc == 0); - return rc; -} - -void -lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages) -{ - int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); - - while (--npages >= 0) - cfs_free_page(rb->rb_kiov[npages].kiov_page); - - LIBCFS_FREE(rb, sz); -} - -lnet_rtrbuf_t * -lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp) -{ - int npages = rbp->rbp_npages; - int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]); - struct page *page; - lnet_rtrbuf_t *rb; - int i; - - LIBCFS_ALLOC(rb, sz); - - rb->rb_pool = rbp; - - for (i = 0; i < npages; i++) { - page = cfs_alloc_page(CFS_ALLOC_ZERO | CFS_ALLOC_STD); - if (page == NULL) { - while (--i >= 0) - cfs_free_page(rb->rb_kiov[i].kiov_page); - - LIBCFS_FREE(rb, sz); - return NULL; - } - - rb->rb_kiov[i].kiov_len = CFS_PAGE_SIZE; - rb->rb_kiov[i].kiov_offset = 0; - rb->rb_kiov[i].kiov_page = page; - } - - return rb; -} - -void -lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp) -{ - int npages = rbp->rbp_npages; - int nbuffers = 0; - lnet_rtrbuf_t *rb; - - LASSERT (list_empty(&rbp->rbp_msgs)); - LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers); - - while (!list_empty(&rbp->rbp_bufs)) { - LASSERT (rbp->rbp_credits > 0); - - rb = list_entry(rbp->rbp_bufs.next, - lnet_rtrbuf_t, rb_list); - list_del(&rb->rb_list); - lnet_destroy_rtrbuf(rb, npages); - nbuffers++; - } - - LASSERT (rbp->rbp_nbuffers == nbuffers); - LASSERT (rbp->rbp_credits == nbuffers); - - rbp->rbp_nbuffers = rbp->rbp_credits = 0; -} - -int -lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs) -{ - lnet_rtrbuf_t *rb; - int i; - - if (rbp->rbp_nbuffers != 0) { - LASSERT (rbp->rbp_nbuffers == nbufs); - return 0; - } - - for (i = 0; i < nbufs; i++) { - rb = lnet_new_rtrbuf(rbp); - - if (rb == NULL) { - CERROR("Failed to allocate %d router bufs of %d pages\n", - nbufs, rbp->rbp_npages); - return -ENOMEM; - } - - rbp->rbp_nbuffers++; - rbp->rbp_credits++; - rbp->rbp_mincredits++; - list_add(&rb->rb_list, &rbp->rbp_bufs); - - /* No allocation "under fire" */ - /* Otherwise we'd need code to schedule blocked msgs etc */ - LASSERT (!the_lnet.ln_routing); - } - - LASSERT (rbp->rbp_credits == nbufs); - return 0; -} - -void -lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages) -{ - CFS_INIT_LIST_HEAD(&rbp->rbp_msgs); - CFS_INIT_LIST_HEAD(&rbp->rbp_bufs); - - rbp->rbp_npages = npages; - rbp->rbp_credits = 0; - rbp->rbp_mincredits = 0; -} - -void -lnet_free_rtrpools(void) -{ - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[0]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[1]); - lnet_rtrpool_free_bufs(&the_lnet.ln_rtrpools[2]); -} - -void -lnet_init_rtrpools(void) -{ - int small_pages = 1; - int large_pages = (LNET_MTU + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - - lnet_rtrpool_init(&the_lnet.ln_rtrpools[0], 0); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[1], small_pages); - lnet_rtrpool_init(&the_lnet.ln_rtrpools[2], large_pages); -} - - -int -lnet_alloc_rtrpools(int im_a_router) -{ - int rc; - - if (!strcmp(forwarding, "")) { - /* not set either way */ - if (!im_a_router) - return 0; - } else if (!strcmp(forwarding, "disabled")) { - /* explicitly disabled */ - return 0; - } else if (!strcmp(forwarding, "enabled")) { - /* explicitly enabled */ - } else { - LCONSOLE_ERROR("'forwarding' not set to either " - "'enabled' or 'disabled'\n"); - return -EINVAL; - } - - if (tiny_router_buffers <= 0) { - LCONSOLE_ERROR("tiny_router_buffers=%d invalid when " - "routing enabled\n", tiny_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[0], - tiny_router_buffers); - if (rc != 0) - goto failed; - - if (small_router_buffers <= 0) { - LCONSOLE_ERROR("small_router_buffers=%d invalid when " - "routing enabled\n", small_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[1], - small_router_buffers); - if (rc != 0) - goto failed; - - if (large_router_buffers <= 0) { - LCONSOLE_ERROR("large_router_buffers=%d invalid when " - "routing enabled\n", large_router_buffers); - rc = -EINVAL; - goto failed; - } - - rc = lnet_rtrpool_alloc_bufs(&the_lnet.ln_rtrpools[2], - large_router_buffers); - if (rc != 0) - goto failed; - - LNET_LOCK(); - the_lnet.ln_routing = 1; - LNET_UNLOCK(); - - return 0; - - failed: - lnet_free_rtrpools(); - return rc; -} - -#else - -int -lnet_peers_start_down(void) -{ - return 0; -} - -void -lnet_router_checker_stop(void) -{ - return; -} - -int -lnet_router_checker_start(void) -{ - return 0; -} - -void -lnet_free_rtrpools (void) -{ -} - -void -lnet_init_rtrpools (void) -{ -} - -int -lnet_alloc_rtrpools (int im_a_arouter) -{ - return 0; -} - -#endif diff --git a/lnet/lnet/router_proc.c b/lnet/lnet/router_proc.c deleted file mode 100644 index 5be36b18d8c7a8abda0d01536907861de0b25a17..0000000000000000000000000000000000000000 --- a/lnet/lnet/router_proc.c +++ /dev/null @@ -1,1094 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LNET -#include <libcfs/libcfs.h> -#include <lnet/lib-lnet.h> - -#if defined(__KERNEL__) && defined(LNET_ROUTER) - -#include <linux/seq_file.h> -#include <linux/lustre_compat25.h> - -/* this is really lnet_proc.c */ - -#define LNET_PROC_STATS "sys/lnet/stats" -#define LNET_PROC_ROUTES "sys/lnet/routes" -#define LNET_PROC_ROUTERS "sys/lnet/routers" -#define LNET_PROC_PEERS "sys/lnet/peers" -#define LNET_PROC_BUFFERS "sys/lnet/buffers" -#define LNET_PROC_NIS "sys/lnet/nis" - -static int -lnet_router_proc_stats_read (char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - lnet_counters_t *ctrs; - int rc; - - *start = page; - *eof = 1; - if (off != 0) - return 0; - - LIBCFS_ALLOC(ctrs, sizeof(*ctrs)); - if (ctrs == NULL) - return -ENOMEM; - - LNET_LOCK(); - *ctrs = the_lnet.ln_counters; - LNET_UNLOCK(); - - rc = sprintf(page, - "%u %u %u %u %u %u %u "LPU64" "LPU64" "LPU64" "LPU64"\n", - ctrs->msgs_alloc, ctrs->msgs_max, - ctrs->errors, - ctrs->send_count, ctrs->recv_count, - ctrs->route_count, ctrs->drop_count, - ctrs->send_length, ctrs->recv_length, - ctrs->route_length, ctrs->drop_length); - - LIBCFS_FREE(ctrs, sizeof(*ctrs)); - return rc; -} - -static int -lnet_router_proc_stats_write(struct file *file, const char *ubuffer, - unsigned long count, void *data) -{ - LNET_LOCK(); - memset(&the_lnet.ln_counters, 0, sizeof(the_lnet.ln_counters)); - LNET_UNLOCK(); - - return (count); -} - -typedef struct { - __u64 lrsi_version; - lnet_remotenet_t *lrsi_net; - lnet_route_t *lrsi_route; - loff_t lrsi_off; -} lnet_route_seq_iterator_t; - -int -lnet_route_seq_seek (lnet_route_seq_iterator_t *lrsi, loff_t off) -{ - struct list_head *n; - struct list_head *r; - int rc; - loff_t here; - - if (off == 0) { - lrsi->lrsi_net = NULL; - lrsi->lrsi_route = NULL; - lrsi->lrsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lrsi->lrsi_net != NULL && - lrsi->lrsi_version != the_lnet.ln_remote_nets_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lrsi->lrsi_net == NULL || lrsi->lrsi_off > off) { - /* search from start */ - n = the_lnet.ln_remote_nets.next; - r = NULL; - here = 1; - } else { - /* continue search */ - n = &lrsi->lrsi_net->lrn_list; - r = &lrsi->lrsi_route->lr_list; - here = lrsi->lrsi_off; - } - - lrsi->lrsi_version = the_lnet.ln_remote_nets_version; - lrsi->lrsi_off = off; - - while (n != &the_lnet.ln_remote_nets) { - lnet_remotenet_t *rnet = - list_entry(n, lnet_remotenet_t, lrn_list); - - if (r == NULL) - r = rnet->lrn_routes.next; - - while (r != &rnet->lrn_routes) { - lnet_route_t *re = - list_entry(r, lnet_route_t, - lr_list); - - if (here == off) { - lrsi->lrsi_net = rnet; - lrsi->lrsi_route = re; - rc = 0; - goto out; - } - - r = r->next; - here++; - } - - r = NULL; - n = n->next; - } - - lrsi->lrsi_net = NULL; - lrsi->lrsi_route = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_route_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_route_seq_iterator_t *lrsi; - int rc; - - LIBCFS_ALLOC(lrsi, sizeof(*lrsi)); - if (lrsi == NULL) - return NULL; - - lrsi->lrsi_net = NULL; - rc = lnet_route_seq_seek(lrsi, *pos); - if (rc == 0) - return lrsi; - - LIBCFS_FREE(lrsi, sizeof(*lrsi)); - return NULL; -} - -static void -lnet_route_seq_stop (struct seq_file *s, void *iter) -{ - lnet_route_seq_iterator_t *lrsi = iter; - - if (lrsi != NULL) - LIBCFS_FREE(lrsi, sizeof(*lrsi)); -} - -static void * -lnet_route_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_route_seq_iterator_t *lrsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_route_seq_seek(lrsi, next); - if (rc != 0) { - LIBCFS_FREE(lrsi, sizeof(*lrsi)); - return NULL; - } - - *pos = next; - return lrsi; -} - -static int -lnet_route_seq_show (struct seq_file *s, void *iter) -{ - lnet_route_seq_iterator_t *lrsi = iter; - __u32 net; - unsigned int hops; - lnet_nid_t nid; - int alive; - - if (lrsi->lrsi_off == 0) { - seq_printf(s, "Routing %s\n", - the_lnet.ln_routing ? "enabled" : "disabled"); - seq_printf(s, "%-8s %4s %7s %s\n", - "net", "hops", "state", "router"); - return 0; - } - - LASSERT (lrsi->lrsi_net != NULL); - LASSERT (lrsi->lrsi_route != NULL); - - LNET_LOCK(); - - if (lrsi->lrsi_version != the_lnet.ln_remote_nets_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - net = lrsi->lrsi_net->lrn_net; - hops = lrsi->lrsi_net->lrn_hops; - nid = lrsi->lrsi_route->lr_gateway->lp_nid; - alive = lrsi->lrsi_route->lr_gateway->lp_alive; - - LNET_UNLOCK(); - - seq_printf(s, "%-8s %4u %7s %s\n", libcfs_net2str(net), hops, - alive ? "up" : "down", libcfs_nid2str(nid)); - return 0; -} - -static struct seq_operations lnet_routes_sops = { - .start = lnet_route_seq_start, - .stop = lnet_route_seq_stop, - .next = lnet_route_seq_next, - .show = lnet_route_seq_show, -}; - -static int -lnet_route_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_routes_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_routes_fops = { - .owner = THIS_MODULE, - .open = lnet_route_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - __u64 lrtrsi_version; - lnet_peer_t *lrtrsi_router; - loff_t lrtrsi_off; -} lnet_router_seq_iterator_t; - -int -lnet_router_seq_seek (lnet_router_seq_iterator_t *lrtrsi, loff_t off) -{ - struct list_head *r; - lnet_peer_t *lp; - int rc; - loff_t here; - - if (off == 0) { - lrtrsi->lrtrsi_router = NULL; - lrtrsi->lrtrsi_off = 0; - return 0; - } - - LNET_LOCK(); - - lp = lrtrsi->lrtrsi_router; - - if (lp != NULL && - lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lp == NULL || lrtrsi->lrtrsi_off > off) { - /* search from start */ - r = the_lnet.ln_routers.next; - here = 1; - } else { - /* continue search */ - r = &lp->lp_rtr_list; - here = lrtrsi->lrtrsi_off; - } - - lrtrsi->lrtrsi_version = the_lnet.ln_routers_version; - lrtrsi->lrtrsi_off = off; - - while (r != &the_lnet.ln_routers) { - lnet_peer_t *rtr = list_entry(r, - lnet_peer_t, - lp_rtr_list); - - if (here == off) { - lrtrsi->lrtrsi_router = rtr; - rc = 0; - goto out; - } - - r = r->next; - here++; - } - - lrtrsi->lrtrsi_router = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_router_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_router_seq_iterator_t *lrtrsi; - int rc; - - LIBCFS_ALLOC(lrtrsi, sizeof(*lrtrsi)); - if (lrtrsi == NULL) - return NULL; - - lrtrsi->lrtrsi_router = NULL; - rc = lnet_router_seq_seek(lrtrsi, *pos); - if (rc == 0) - return lrtrsi; - - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); - return NULL; -} - -static void -lnet_router_seq_stop (struct seq_file *s, void *iter) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - - if (lrtrsi != NULL) - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); -} - -static void * -lnet_router_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_router_seq_seek(lrtrsi, next); - if (rc != 0) { - LIBCFS_FREE(lrtrsi, sizeof(*lrtrsi)); - return NULL; - } - - *pos = next; - return lrtrsi; -} - -static int -lnet_router_seq_show (struct seq_file *s, void *iter) -{ - lnet_router_seq_iterator_t *lrtrsi = iter; - lnet_peer_t *lp; - lnet_nid_t nid; - int alive; - int nrefs; - int nrtrrefs; - - if (lrtrsi->lrtrsi_off == 0) { - seq_printf(s, "%-4s %7s %9s %6s %12s %s\n", - "ref", "rtr_ref", "alive_cnt", "state", "last_ping", "router"); - return 0; - } - - lp = lrtrsi->lrtrsi_router; - LASSERT (lp != NULL); - - LNET_LOCK(); - - if (lrtrsi->lrtrsi_version != the_lnet.ln_routers_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - nrefs = lp->lp_refcount; - nrtrrefs = lp->lp_rtr_refcount; - nid = lp->lp_nid; - alive = lp->lp_alive; - - LNET_UNLOCK(); - - seq_printf(s, - "%-4d %7d %9d %6s %12lu %s\n", - nrefs, nrtrrefs, - lp->lp_alive_count, - alive ? "up" : "down", - lp->lp_ping_timestamp, - libcfs_nid2str(nid)); - return 0; -} - -static struct seq_operations lnet_routers_sops = { - .start = lnet_router_seq_start, - .stop = lnet_router_seq_stop, - .next = lnet_router_seq_next, - .show = lnet_router_seq_show, -}; - -static int -lnet_router_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_routers_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_routers_fops = { - .owner = THIS_MODULE, - .open = lnet_router_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - unsigned long long lpsi_version; - int lpsi_idx; - lnet_peer_t *lpsi_peer; - loff_t lpsi_off; -} lnet_peer_seq_iterator_t; - -int -lnet_peer_seq_seek (lnet_peer_seq_iterator_t *lpsi, loff_t off) -{ - int idx; - struct list_head *p; - loff_t here; - int rc; - - if (off == 0) { - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - lpsi->lpsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lpsi->lpsi_peer != NULL && - lpsi->lpsi_version != the_lnet.ln_peertable_version) { - /* tables have changed */ - rc = -ESTALE; - goto out; - } - - if (lpsi->lpsi_peer == NULL || - lpsi->lpsi_off > off) { - /* search from start */ - idx = 0; - p = NULL; - here = 1; - } else { - /* continue search */ - idx = lpsi->lpsi_idx; - p = &lpsi->lpsi_peer->lp_hashlist; - here = lpsi->lpsi_off; - } - - lpsi->lpsi_version = the_lnet.ln_peertable_version; - lpsi->lpsi_off = off; - - while (idx < LNET_PEER_HASHSIZE) { - if (p == NULL) - p = the_lnet.ln_peer_hash[idx].next; - - while (p != &the_lnet.ln_peer_hash[idx]) { - lnet_peer_t *lp = list_entry(p, lnet_peer_t, - lp_hashlist); - - if (here == off) { - lpsi->lpsi_idx = idx; - lpsi->lpsi_peer = lp; - rc = 0; - goto out; - } - - here++; - p = lp->lp_hashlist.next; - } - - p = NULL; - idx++; - } - - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_peer_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_peer_seq_iterator_t *lpsi; - int rc; - - LIBCFS_ALLOC(lpsi, sizeof(*lpsi)); - if (lpsi == NULL) - return NULL; - - lpsi->lpsi_idx = 0; - lpsi->lpsi_peer = NULL; - rc = lnet_peer_seq_seek(lpsi, *pos); - if (rc == 0) - return lpsi; - - LIBCFS_FREE(lpsi, sizeof(*lpsi)); - return NULL; -} - -static void -lnet_peer_seq_stop (struct seq_file *s, void *iter) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - - if (lpsi != NULL) - LIBCFS_FREE(lpsi, sizeof(*lpsi)); -} - -static void * -lnet_peer_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_peer_seq_seek(lpsi, next); - if (rc != 0) { - LIBCFS_FREE(lpsi, sizeof(*lpsi)); - return NULL; - } - - *pos = next; - return lpsi; -} - -static int -lnet_peer_seq_show (struct seq_file *s, void *iter) -{ - lnet_peer_seq_iterator_t *lpsi = iter; - lnet_peer_t *lp; - lnet_nid_t nid; - int maxcr; - int mintxcr; - int txcr; - int minrtrcr; - int rtrcr; - int alive; - int txqnob; - int nrefs; - - if (lpsi->lpsi_off == 0) { - seq_printf(s, "%-24s %4s %5s %5s %5s %5s %5s %5s %s\n", - "nid", "refs", "state", "max", - "rtr", "min", "tx", "min", "queue"); - return 0; - } - - LASSERT (lpsi->lpsi_peer != NULL); - - LNET_LOCK(); - - if (lpsi->lpsi_version != the_lnet.ln_peertable_version) { - LNET_UNLOCK(); - return -ESTALE; - } - - lp = lpsi->lpsi_peer; - - nid = lp->lp_nid; - maxcr = lp->lp_ni->ni_peertxcredits; - txcr = lp->lp_txcredits; - mintxcr = lp->lp_mintxcredits; - rtrcr = lp->lp_rtrcredits; - minrtrcr = lp->lp_minrtrcredits; - alive = lp->lp_alive; - txqnob = lp->lp_txqnob; - nrefs = lp->lp_refcount; - - LNET_UNLOCK(); - - seq_printf(s, "%-24s %4d %5s %5d %5d %5d %5d %5d %d\n", - libcfs_nid2str(nid), nrefs, alive ? "up" : "down", - maxcr, rtrcr, minrtrcr, txcr, mintxcr, txqnob); - return 0; -} - -static struct seq_operations lnet_peer_sops = { - .start = lnet_peer_seq_start, - .stop = lnet_peer_seq_stop, - .next = lnet_peer_seq_next, - .show = lnet_peer_seq_show, -}; - -static int -lnet_peer_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_peer_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_peer_fops = { - .owner = THIS_MODULE, - .open = lnet_peer_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - int lbsi_idx; - loff_t lbsi_off; -} lnet_buffer_seq_iterator_t; - -int -lnet_buffer_seq_seek (lnet_buffer_seq_iterator_t *lbsi, loff_t off) -{ - int idx; - loff_t here; - int rc; - - if (off == 0) { - lbsi->lbsi_idx = -1; - lbsi->lbsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lbsi->lbsi_idx < 0 || - lbsi->lbsi_off > off) { - /* search from start */ - idx = 0; - here = 1; - } else { - /* continue search */ - idx = lbsi->lbsi_idx; - here = lbsi->lbsi_off; - } - - lbsi->lbsi_off = off; - - while (idx < LNET_NRBPOOLS) { - if (here == off) { - lbsi->lbsi_idx = idx; - rc = 0; - goto out; - } - here++; - idx++; - } - - lbsi->lbsi_idx = -1; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_buffer_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_buffer_seq_iterator_t *lbsi; - int rc; - - LIBCFS_ALLOC(lbsi, sizeof(*lbsi)); - if (lbsi == NULL) - return NULL; - - lbsi->lbsi_idx = -1; - rc = lnet_buffer_seq_seek(lbsi, *pos); - if (rc == 0) - return lbsi; - - LIBCFS_FREE(lbsi, sizeof(*lbsi)); - return NULL; -} - -static void -lnet_buffer_seq_stop (struct seq_file *s, void *iter) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - - if (lbsi != NULL) - LIBCFS_FREE(lbsi, sizeof(*lbsi)); -} - -static void * -lnet_buffer_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_buffer_seq_seek(lbsi, next); - if (rc != 0) { - LIBCFS_FREE(lbsi, sizeof(*lbsi)); - return NULL; - } - - *pos = next; - return lbsi; -} - -static int -lnet_buffer_seq_show (struct seq_file *s, void *iter) -{ - lnet_buffer_seq_iterator_t *lbsi = iter; - lnet_rtrbufpool_t *rbp; - int npages; - int nbuf; - int cr; - int mincr; - - if (lbsi->lbsi_off == 0) { - seq_printf(s, "%5s %5s %7s %7s\n", - "pages", "count", "credits", "min"); - return 0; - } - - LASSERT (lbsi->lbsi_idx >= 0 && lbsi->lbsi_idx < LNET_NRBPOOLS); - - LNET_LOCK(); - - rbp = &the_lnet.ln_rtrpools[lbsi->lbsi_idx]; - - npages = rbp->rbp_npages; - nbuf = rbp->rbp_nbuffers; - cr = rbp->rbp_credits; - mincr = rbp->rbp_mincredits; - - LNET_UNLOCK(); - - seq_printf(s, "%5d %5d %7d %7d\n", - npages, nbuf, cr, mincr); - return 0; -} - -static struct seq_operations lnet_buffer_sops = { - .start = lnet_buffer_seq_start, - .stop = lnet_buffer_seq_stop, - .next = lnet_buffer_seq_next, - .show = lnet_buffer_seq_show, -}; - -static int -lnet_buffer_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_buffer_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_buffers_fops = { - .owner = THIS_MODULE, - .open = lnet_buffer_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -typedef struct { - lnet_ni_t *lnsi_ni; - loff_t lnsi_off; -} lnet_ni_seq_iterator_t; - -int -lnet_ni_seq_seek (lnet_ni_seq_iterator_t *lnsi, loff_t off) -{ - struct list_head *n; - loff_t here; - int rc; - - if (off == 0) { - lnsi->lnsi_ni = NULL; - lnsi->lnsi_off = 0; - return 0; - } - - LNET_LOCK(); - - if (lnsi->lnsi_ni == NULL || - lnsi->lnsi_off > off) { - /* search from start */ - n = NULL; - here = 1; - } else { - /* continue search */ - n = &lnsi->lnsi_ni->ni_list; - here = lnsi->lnsi_off; - } - - lnsi->lnsi_off = off; - - if (n == NULL) - n = the_lnet.ln_nis.next; - - while (n != &the_lnet.ln_nis) { - if (here == off) { - lnsi->lnsi_ni = list_entry(n, lnet_ni_t, ni_list); - rc = 0; - goto out; - } - here++; - n = n->next; - } - - lnsi->lnsi_ni = NULL; - rc = -ENOENT; - out: - LNET_UNLOCK(); - return rc; -} - -static void * -lnet_ni_seq_start (struct seq_file *s, loff_t *pos) -{ - lnet_ni_seq_iterator_t *lnsi; - int rc; - - LIBCFS_ALLOC(lnsi, sizeof(*lnsi)); - if (lnsi == NULL) - return NULL; - - lnsi->lnsi_ni = NULL; - rc = lnet_ni_seq_seek(lnsi, *pos); - if (rc == 0) - return lnsi; - - LIBCFS_FREE(lnsi, sizeof(*lnsi)); - return NULL; -} - -static void -lnet_ni_seq_stop (struct seq_file *s, void *iter) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - - if (lnsi != NULL) - LIBCFS_FREE(lnsi, sizeof(*lnsi)); -} - -static void * -lnet_ni_seq_next (struct seq_file *s, void *iter, loff_t *pos) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - int rc; - loff_t next = *pos + 1; - - rc = lnet_ni_seq_seek(lnsi, next); - if (rc != 0) { - LIBCFS_FREE(lnsi, sizeof(*lnsi)); - return NULL; - } - - *pos = next; - return lnsi; -} - -static int -lnet_ni_seq_show (struct seq_file *s, void *iter) -{ - lnet_ni_seq_iterator_t *lnsi = iter; - lnet_ni_t *ni; - int maxtxcr; - int txcr; - int mintxcr; - int npeertxcr; - lnet_nid_t nid; - int nref; - - if (lnsi->lnsi_off == 0) { - seq_printf(s, "%-24s %4s %4s %5s %5s %5s\n", - "nid", "refs", "peer", "max", "tx", "min"); - return 0; - } - - LASSERT (lnsi->lnsi_ni != NULL); - - LNET_LOCK(); - - ni = lnsi->lnsi_ni; - - maxtxcr = ni->ni_maxtxcredits; - txcr = ni->ni_txcredits; - mintxcr = ni->ni_mintxcredits; - npeertxcr = ni->ni_peertxcredits; - nid = ni->ni_nid; - nref = ni->ni_refcount; - - LNET_UNLOCK(); - - seq_printf(s, "%-24s %4d %4d %5d %5d %5d\n", - libcfs_nid2str(nid), nref, - npeertxcr, maxtxcr, txcr, mintxcr); - return 0; -} - -static struct seq_operations lnet_ni_sops = { - .start = lnet_ni_seq_start, - .stop = lnet_ni_seq_stop, - .next = lnet_ni_seq_next, - .show = lnet_ni_seq_show, -}; - -static int -lnet_ni_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *sf; - int rc; - - rc = seq_open(file, &lnet_ni_sops); - if (rc == 0) { - sf = file->private_data; - sf->private = dp->data; - } - - return rc; -} - -static struct file_operations lnet_ni_fops = { - .owner = THIS_MODULE, - .open = lnet_ni_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -void -lnet_proc_init(void) -{ - struct proc_dir_entry *stats; - struct proc_dir_entry *routes; - struct proc_dir_entry *routers; - struct proc_dir_entry *peers; - - /* Initialize LNET_PROC_STATS */ - stats = create_proc_entry (LNET_PROC_STATS, 0644, NULL); - if (stats == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_STATS); - return; - } - - stats->data = NULL; - stats->read_proc = lnet_router_proc_stats_read; - stats->write_proc = lnet_router_proc_stats_write; - - /* Initialize LNET_PROC_ROUTES */ - routes = create_proc_entry (LNET_PROC_ROUTES, 0444, NULL); - if (routes == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTES); - return; - } - - routes->proc_fops = &lnet_routes_fops; - routes->data = NULL; - - /* Initialize LNET_PROC_ROUTERS */ - routers = create_proc_entry (LNET_PROC_ROUTERS, 0444, NULL); - if (routers == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_ROUTERS); - return; - } - - routers->proc_fops = &lnet_routers_fops; - routers->data = NULL; - - /* Initialize LNET_PROC_PEERS */ - peers = create_proc_entry (LNET_PROC_PEERS, 0444, NULL); - if (peers == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_PEERS); - return; - } - - peers->proc_fops = &lnet_peer_fops; - peers->data = NULL; - - /* Initialize LNET_PROC_BUFFERS */ - peers = create_proc_entry (LNET_PROC_BUFFERS, 0444, NULL); - if (peers == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_BUFFERS); - return; - } - - peers->proc_fops = &lnet_buffers_fops; - peers->data = NULL; - - /* Initialize LNET_PROC_NIS */ - peers = create_proc_entry (LNET_PROC_NIS, 0444, NULL); - if (peers == NULL) { - CERROR("couldn't create proc entry %s\n", LNET_PROC_NIS); - return; - } - - peers->proc_fops = &lnet_ni_fops; - peers->data = NULL; -} - -void -lnet_proc_fini(void) -{ - remove_proc_entry(LNET_PROC_STATS, 0); - remove_proc_entry(LNET_PROC_ROUTES, 0); - remove_proc_entry(LNET_PROC_ROUTERS, 0); - remove_proc_entry(LNET_PROC_PEERS, 0); - remove_proc_entry(LNET_PROC_BUFFERS, 0); - remove_proc_entry(LNET_PROC_NIS, 0); -} - -#else - -void -lnet_proc_init(void) -{ -} - -void -lnet_proc_fini(void) -{ -} - -#endif diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore deleted file mode 100644 index 5ed596bbf5a8bc84d4ce3514700a939431df4da6..0000000000000000000000000000000000000000 --- a/lnet/router/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -.deps -Makefile -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/tests/.cvsignore b/lnet/tests/.cvsignore deleted file mode 100644 index e03413094ff2c07671a12625e94401893ce81af3..0000000000000000000000000000000000000000 --- a/lnet/tests/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -Makefile -.deps -.*.cmd -autoMakefile.in -autoMakefile -*.ko -*.mod.c -.*.flags -.tmp_versions -.depend diff --git a/lnet/tests/Makefile.in b/lnet/tests/Makefile.in deleted file mode 100644 index 5860c3e973f3143f3b1054f0c7f2a2630376bb58..0000000000000000000000000000000000000000 --- a/lnet/tests/Makefile.in +++ /dev/null @@ -1,14 +0,0 @@ -MODULES := pingsrv pingcli -#utcli utsrv -pingsrv-objs := ping_srv.o - -ifeq ($(PATCHLEVEL),6) -pingcli-objs := ping_cli.o -#utcli-objs := ut_cli.o -#utsrv-objs := ut_srv.o -else -ping%.c: ping_%.c - ln -sf $< $@ -endif - -@INCLUDE_RULES@ diff --git a/lnet/tests/arch-linux/ping.h b/lnet/tests/arch-linux/ping.h deleted file mode 100644 index 640100e255c0fb9a897f4fb760842451d79e41f1..0000000000000000000000000000000000000000 --- a/lnet/tests/arch-linux/ping.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef __LINUX_PING_H__ -#define __LINUX_PING_H__ - -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <linux/workqueue.h> -#else -#include <linux/tqueue.h> -#endif -#include <linux/wait.h> -#include <linux/smp_lock.h> -#include <linux/poll.h> - -#include <asm/unistd.h> -#include <asm/semaphore.h> - -#endif diff --git a/lnet/tests/arch-xnu/ping.h b/lnet/tests/arch-xnu/ping.h deleted file mode 100644 index bb1327686b20a40e8e0ce4751e576530011cc8c3..0000000000000000000000000000000000000000 --- a/lnet/tests/arch-xnu/ping.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __XNU_PING_H__ -#define __XNU_PING_H__ - -#include <mach/mach_types.h> -#include <arch-xnu/cfs_lock.h> -#include <arch-xnu/cfs_prim.h> - -#endif diff --git a/lnet/tests/autoMakefile.am b/lnet/tests/autoMakefile.am deleted file mode 100644 index f187255882bf3155ec34e38671dc0d4c720a678e..0000000000000000000000000000000000000000 --- a/lnet/tests/autoMakefile.am +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -if MODULES -if TESTS - -if LINUX -noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT) -#noinst_DATA += utsrv$(KMODEXT) utcli$(KMODEXT) -endif - -if DARWIN -macos_PROGRAMS := pingcli -#macos_PROGRAMS := pingsrv - -pingcli_SOURCES := ping_cli.c - -pingcli_CFLAGS := $(EXTRA_KCFLAGS) -pingcli_LDFLAGS := $(EXTRA_KLDFLAGS) -pingcli_LDADD := $(EXTRA_KLIBS) - -#pingsrv_SOURCES := ping_srv.c - -#pingsrv_CFLAGS := $(EXTRA_KCFLAGS) -#pingsrv_LDFLAGS := $(EXTRA_KLDFLAGS) -#pingsrv_LDADD := $(EXTRA_KLIBS) - -plist_DATA := ping_cli/Info.plist -#plist_DATA := ping_srv/Info.plist - -install_data_hook := fix-kext-ownership -endif # Darwin - -endif # TEST -endif # MODULE -install-data-hook: $(install_data_hook) - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ pingsrv.c pingcli.c -DIST_SOURCES = ping_srv.c ping_cli.c ping.h -#ut_cli.c ut_srv.c ut.h diff --git a/lnet/tests/build-osx b/lnet/tests/build-osx deleted file mode 100644 index 5af66cbb55d0e2c05f9ae87ac7537e2318fedd6a..0000000000000000000000000000000000000000 --- a/lnet/tests/build-osx +++ /dev/null @@ -1,159 +0,0 @@ -#! /bin/sh - -if false ;then - OPTVAL=`getopt -o cb:l:s:k:L:v -n 'build-all' -- "$@"` -else - # XNU/BSD getopt is special... - OPTVAL=$(getopt cb:l:s:k:L:v "$@") -fi - -if [ $? != 0 ] -then - echo 'Usage: see source...' - exit 2 -fi - -eval set -- "$OPTVAL" - -cd $(dirname $0) - -#set -x - -b=$PWD # base directory -l=$b/build.log # where to log operations -s=$b/build.seq # build sequence -k=$b/kext.stage # where to place kexts after build - -load='' # list of kexts to load -clean=0 -verbose=0 - -while true ;do - case "$1" in - -c) - clean=1 - shift 1 - ;; - -v) - verbose=$(($verbose + 1)) - shift 1 - ;; - -b) - b=$2 - shift 2 - ;; - -l) - l=$2 - shift 2 - ;; - -s) - s=$2 - shift 2 - ;; - -k) - k=$2 - shift 2 - ;; - -L) - load=$2 - shift 2 - ;; - --) - shift - break - ;; - *) - echo "Internal error!" - exit 1 - ;; - esac -done - -echo > $l - -function message () -{ - local msg - - msg="$1" - echo $msg - echo $msg >> $l -} - -function abort () -{ - local msg - - msg=$1 - - message "$1" - exit 1 -} - -function configure_xcode () -{ - local path - local pfile - local module - - path=$PWD - module=$(basename $path) - pfile=$path/$module.xcode/project.pbxproj - if [ -r $pfile.template ] ;then - cpp \ - -P \ - -include $b/build-config \ - $pfile.template | \ - tail +2 > $pfile - else - abort "missing $pfile.template" - fi -} - -if [ x$clean != x0 ] ;then - echo "Removing..." - find $b/ -type d -name build - rm -fr $(find $b/ -type d -name build) - find $b/ -print0 | xargs -0 touch -fi - -cat $s | while read ;do - d=$REPLY - if [ x$d = x ] ;then - : # empty line. Do nothing - elif [ ${d:0:1} = '#' ] ;then - : # comment. Skip - else - cd $d || abort "Cannot cd to $d" - message "________ Building in $d __________" - #configure_xcode - if [ $verbose -gt 0 ] ;then - xcodebuild 2>&1 | tee -a $l - else - xcodebuild >> $l 2>&1 || abort "Build failure in $d. See $l" - fi - # tail -2 $l - cd $b - fi -done - -# copy all built kexts into $k -# sudo is used, because extensions are later chowned to root. -sudo rm -f ../include/arch -ln -s ../include/arch-xnu ../include/arch -sudo rm -fr $k || abort "Cannot clean $k" -mkdir $k || abort "Cannot create $k" -cp -R $(find ../ -name \*.kext -type d) $k || abort "Cannot stage kexts" -cd $k || abort "Cannot chdir to $k" -sudo chown -R root:wheel * || abort "Cannot chown kexts to root:wheel" - -if [ x$load != x ] ;then - cd $k - sudo kextload -r $k $load -else - sudo chown -R root:wheel * -fi -cd $b - -sync;sync;sync - diff --git a/lnet/tests/build.seq b/lnet/tests/build.seq deleted file mode 100644 index e6298b71f86572aa3ec814170639d7e8da9b4bd7..0000000000000000000000000000000000000000 --- a/lnet/tests/build.seq +++ /dev/null @@ -1,5 +0,0 @@ -../libcfs -../portals -../knals/socknal -./ping_cli -./ping_srv diff --git a/lnet/tests/ping.h b/lnet/tests/ping.h deleted file mode 100644 index 1dde8bcc861a43a68a3741f55c9f216eb578eeb9..0000000000000000000000000000000000000000 --- a/lnet/tests/ping.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef _KPING_INCLUDED -#define _KPING_INCLUDED - -#include <libcfs/portals_utils.h> -#include <lnet/lnet.h> - - -#define PTL_PING_IN_SIZE 256 // n packets per buffer -#define PTL_PING_IN_BUFFERS 2 // n fallback buffers - -#define PTL_PING_CLIENT 4 -#define PTL_PING_SERVER 5 - -#define PING_HEADER_MAGIC 0xDEADBEEF -#define PING_BULK_MAGIC 0xCAFEBABE - -#define PING_HEAD_BITS 0x00000001 -#define PING_BULK_BITS 0x00000002 -#define PING_IGNORE_BITS 0xFFFFFFFC - -#define PTL_PING_ACK 0x01 -#define PTL_PING_VERBOSE 0x02 -#define PTL_PING_VERIFY 0x04 -#define PTL_PING_PREALLOC 0x08 - - -#define NEXT_PRIMARY_BUFFER(index) \ - (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1)) - -#define PDEBUG(str, err) \ - CERROR ("%s: error=(%d)\n", str, err) - - -/* Ping data to be passed via the ioctl to kernel space */ - -#if __KERNEL__ - -struct pingsrv_data { - lnet_handle_me_t me; - lnet_handle_eq_t eq; - void *in_buf; - lnet_process_id_t my_id; - lnet_process_id_t id_local; - lnet_md_t mdin; - lnet_md_t mdout; - lnet_handle_md_t mdin_h; - lnet_handle_md_t mdout_h; - lnet_event_t evnt; - cfs_task_t *tsk; -}; /* struct pingsrv_data */ - -struct pingcli_data { - - int count; - int size; - lnet_nid_t nid; - int timeout; - lnet_handle_me_t me; - lnet_handle_eq_t eq; - char *inbuf; - char *outbuf; - lnet_process_id_t myid; - lnet_process_id_t id_local; - lnet_process_id_t id_remote; - lnet_md_t md_in_head; - lnet_md_t md_out_head; - lnet_handle_md_t md_in_head_h; - lnet_handle_md_t md_out_head_h; - lnet_event_t ev; - cfs_task_t *tsk; -}; /* struct pingcli_data */ - - -#endif /* __KERNEL__ */ - -#endif /* _KPING_INCLUDED */ diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c deleted file mode 100644 index eaf83c05b0606f57f3bc1f18f6862cbc855d2fae..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_cli.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Kedar Sovani (kedar@calsoftinc.com) - * Amey Inamdar (amey@calsoftinc.com) - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include "ping.h" -/* int libcfs_debug = D_PING_CLI; */ - - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) - -#define MAX_TIME 100000 - -/* This should be enclosed in a structure */ - -static struct pingcli_data *client = NULL; - -static int count = 0; - -static void -pingcli_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = LNetMDUnlink (client->md_out_head_h))) - PDEBUG ("LNetMDUnlink", rc); - case 2: - if ((rc = LNetMDUnlink (client->md_in_head_h))) - PDEBUG ("LNetMDUnlink", rc); - - /* Free the event queue */ - if ((rc = LNetEQFree (client->eq))) - PDEBUG ("LNetEQFree", rc); - - if ((rc = LNetMEUnlink (client->me))) - PDEBUG ("LNetMEUnlink", rc); - case 3: - LNetNIFini(); - - case 4: - /* Free our buffers */ - if (client->outbuf != NULL) - LIBCFS_FREE (client->outbuf, STDSIZE + client->size); - - if (client->inbuf != NULL) - LIBCFS_FREE (client->inbuf, - (client->size + STDSIZE) * client->count); - - if (client != NULL) - LIBCFS_FREE (client, - sizeof(struct pingcli_data)); - } - - - CDEBUG (D_OTHER, "ping client released resources\n"); -} /* pingcli_shutdown() */ - -static void pingcli_callback(lnet_event_t *ev) -{ - int i; - unsigned magic; - i = __le32_to_cpu(*(int *)((char *)ev->md.start + ev->offset + sizeof(unsigned))); - magic = __le32_to_cpu(*(int *)((char *)ev->md.start + ev->offset)); - - if(magic != 0xcafebabe) { - CERROR("Unexpected response %x\n", magic); - } - - if((i == count) || !count) - wake_up_process (client->tsk); - else - CERROR("Received response after timeout for %d\n",i); -} - - -static void -pingcli_start(struct libcfs_ioctl_data *args) -{ - unsigned ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC); - int rc; - struct timeval tv1, tv2; - - client->tsk = cfs_current(); - client->nid = args->ioc_nid; - client->count = args->ioc_count; - client->size = args->ioc_u32[0]; - client->timeout = args->ioc_u32[1]; - - CDEBUG (D_OTHER, "pingcli_setup args: nid %s (%s), \ - size %u, count: %u, timeout: %u\n", - libcfs_nid2str(client->nid), - libcfs_nid2str(client->nid), - client->size, client->count, client->timeout); - - - LIBCFS_ALLOC (client->outbuf, STDSIZE + client->size) ; - if (client->outbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return; - } - - LIBCFS_ALLOC (client->inbuf, - (client->size + STDSIZE) * client->count); - if (client->inbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return; - } - - rc = LNetNIInit(0); - if (rc != 0 && rc != 1) - { - CERROR ("LNetNIInit: error %d\n", rc); - pingcli_shutdown (4); - return; - } - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = LNetGetId (1, &client->myid))) - { - CERROR ("LNetGetId error %d\n", rc); - pingcli_shutdown (2); - return; - } - - /* Setup the local match entries */ - client->id_local.nid = LNET_NID_ANY; - client->id_local.pid = LNET_PID_ANY; - - /* Setup the remote match entries */ - client->id_remote.nid = client->nid; - client->id_remote.pid = 0; - - if ((rc = LNetMEAttach (PTL_PING_CLIENT, - client->id_local, 0, ~0, LNET_RETAIN, - LNET_INS_AFTER, &client->me))) - { - CERROR ("LNetMEAttach error %d\n", rc); - pingcli_shutdown (2); - return; - } - - /* Allocate the event queue for this network interface */ - if ((rc = LNetEQAlloc (64, pingcli_callback, &client->eq))) - { - CERROR ("LNetEQAlloc error %d\n", rc); - pingcli_shutdown (2); - return; - } - - count = client->count; - - client->md_in_head.start = client->inbuf; - client->md_in_head.length = (client->size + STDSIZE) * count; - client->md_in_head.threshold = LNET_MD_THRESH_INF; - client->md_in_head.options = LNET_MD_OP_PUT; - client->md_in_head.user_ptr = NULL; - client->md_in_head.eq_handle = client->eq; - memset (client->inbuf, 0, (client->size + STDSIZE) * count); - - /* Attach the incoming buffer */ - if ((rc = LNetMDAttach (client->me, client->md_in_head, - LNET_UNLINK, &client->md_in_head_h))) { - CERROR ("LNetMDAttach error %d\n", rc); - pingcli_shutdown (1); - return; - } - /* Setup the outgoing ping header */ - client->md_out_head.start = client->outbuf; - client->md_out_head.length = STDSIZE + client->size; - client->md_out_head.threshold = client->count; - client->md_out_head.options = LNET_MD_OP_PUT; - client->md_out_head.user_ptr = NULL; - client->md_out_head.eq_handle = LNET_EQ_NONE; - - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); - - count = 0; - - /* Bind the outgoing ping header */ - if ((rc=LNetMDBind (client->md_out_head, - LNET_UNLINK, &client->md_out_head_h))) { - CERROR ("LNetMDBind error %d\n", rc); - pingcli_shutdown (1); - return; - } - while ((client->count - count)) { - unsigned __count; - __count = __cpu_to_le32(count); - - memcpy (client->outbuf + sizeof(unsigned), - &(__count), sizeof(unsigned)); - /* Put the ping packet */ - cfs_fs_timeval (&tv1); - - memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1, - sizeof(struct timeval)); - - if((rc = LNetPut (LNET_NID_ANY, client->md_out_head_h, - LNET_NOACK_REQ, - client->id_remote, PTL_PING_SERVER, - 0, 0, 0))) { - PDEBUG ("LNetPut (header)", rc); - pingcli_shutdown (1); - return; - } - CWARN ("Lustre: sent msg no %d.\n", count); - - set_current_state (CFS_TASK_INTERRUPTIBLE); - rc = cfs_schedule_timeout (CFS_TASK_INTERRUPTIBLE, - cfs_time_seconds(client->timeout)); - if (rc == 0) { - CERROR ("timeout .....\n"); - } else { - cfs_fs_timeval (&tv2); - CWARN("Reply in %u usec\n", - (unsigned)((tv2.tv_sec - tv1.tv_sec) - * 1000000 + (tv2.tv_usec - tv1.tv_usec))); - } - count++; - } - - pingcli_shutdown (2); - -} /* pingcli_setup() */ - - - -/* called by the portals_ioctl for ping requests */ -int kping_client(struct libcfs_ioctl_data *args) -{ - LIBCFS_ALLOC (client, sizeof(struct pingcli_data)); - if (client == NULL) - { - CERROR ("Unable to allocate client structure\n"); - return (0); - } - memset (client, 0, sizeof(struct pingcli_data)); - pingcli_start (args); - - return 0; -} /* kping_client() */ - - -static int __init pingcli_init(void) -{ - PORTAL_SYMBOL_REGISTER(kping_client); - return 0; -} /* pingcli_init() */ - - -static void /*__exit*/ pingcli_cleanup(void) -{ - PORTAL_SYMBOL_UNREGISTER (kping_client); -} /* pingcli_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); -MODULE_LICENSE("GPL"); - -cfs_module(ping_cli, "1.0.0", pingcli_init, pingcli_cleanup); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -EXPORT_SYMBOL (kping_client); -#endif diff --git a/lnet/tests/ping_cli/Info.plist b/lnet/tests/ping_cli/Info.plist deleted file mode 100644 index 4ecee0fb28e10e6b7be59f33f1fd22a02a3519c6..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_cli/Info.plist +++ /dev/null @@ -1,39 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>pingcli</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.pingcli</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - <key>com.clusterfs.lustre.lnet</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> diff --git a/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj b/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj deleted file mode 100644 index 255220dfe9afc576dfc552de631ce71e470803b6..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj +++ /dev/null @@ -1,255 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = ping_cli; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 1949BA72073A08F100E4167C = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = ping_cli.c; - path = ../ping_cli.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1949BA73073A08F100E4167C = { - fileRef = 1949BA72073A08F100E4167C; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 1949BA72073A08F100E4167C, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = "../../include ../"; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_cli; - MODULE_START = ping_cli_start; - MODULE_STOP = ping_cli_stop; - MODULE_VERSION = 1.0.0d1; - OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = ping_cli; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = ping_cli; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = ping_cli; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 1949BA73073A08F100E4167C, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = ping_cli.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/tests/ping_cli/winnt-pingcli.c b/lnet/tests/ping_cli/winnt-pingcli.c deleted file mode 100644 index 7c9a1a1958bef7844fa78b98262c18466942adec..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_cli/winnt-pingcli.c +++ /dev/null @@ -1,634 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Matt Wu <mattwu@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -/* - * Included Headers - */ - - -#include <libcfs/libcfs.h> - - -/* libcfs module init/exit routines */ -DECLARE_INIT(init_libcfs_module); -DECLARE_EXIT(exit_libcfs_module); - -/* portal module init/exit routines */ -DECLARE_INIT(init_lnet); -DECLARE_EXIT(fini_lnet); - -/* tdinal module init/exit routines */ -DECLARE_INIT(ksocknal_module_init); -DECLARE_EXIT(ksocknal_module_fini); - -/* pingcli module init/exit routines */ -DECLARE_INIT(pingcli_init); -DECLARE_EXIT(pingcli_cleanup); - - -/* pingsrv module init/exit routines */ -DECLARE_INIT(pingsrv_init); -DECLARE_EXIT(pingsrv_cleanup); - -/* - * structure definitions - */ - - -#define LUSTRE_PING_VERSION 0x00010000 /* ping srv/cli version: 0001.0000 */ - -#define LUSTRE_PING_DEVICE L"\\Device\\LNET" /* device object name */ -#define LUSTRE_PING_SYMLNK L"\\DosDevices\\LNET" /* user-visible name for the device*/ - -typedef struct _DEVICE_EXTENSION -{ - BOOLEAN bProcFS; - -} DEVICE_EXTENSION, *PDEVICE_EXTENSION; - - -/* - * global definitions - */ - -PDEVICE_OBJECT PingObject = NULL; /* ping device object */ -PDEVICE_OBJECT ProcObject = NULL; /* procfs emulator device */ - - -/* - * common routines - */ - - -// -// complete Irp request ... -// - -NTSTATUS -UTCompleteIrp( - PIRP Irp, - NTSTATUS Status, - ULONG Info - ) -{ - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = Info; - IoCompleteRequest(Irp,IO_NO_INCREMENT); - - return Status; -} - -// -// Open/Create Device ... -// - -NTSTATUS -UTCreate( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - KdPrint(("UTCreate: DeviceCreate ...\n")); - - return UTCompleteIrp(Irp,STATUS_SUCCESS,0); -} - -// -// Close Devcie ... -// - -NTSTATUS -UTClose( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp) -{ - KdPrint(("UTClose: Device Closed.\n")); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - - - -NTSTATUS -UTShutdown( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - KdPrint(("UTShutdown: shuting TdiSock ...\n")); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - -// -// driver frame Routines ... -// - - -NTSTATUS -UTDeviceControl( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST; - PIO_STACK_LOCATION IrpSp; - - ULONG ControlCode; - ULONG InputLength; - ULONG OutputLength; - - PVOID lpvInBuffer; - - KdPrint(("UTDeviceControl: Device Ioctl ...\n")); - - Irp->IoStatus.Information = 0; - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode; - InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength; - OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength; - lpvInBuffer = Irp->AssociatedIrp.SystemBuffer; - - ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); - - switch (ControlCode) - { - case IOCTL_LIBCFS_VERSION: - - *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION); - Irp->IoStatus.Information = sizeof(ULONG); - Status = STATUS_SUCCESS; - break; - - default: - break; - } - - Irp->IoStatus.Status = Status; - - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - KdPrint(("UTDeviceControl: Device Ioctl returned.\n")); - - return Status; -} - -NTSTATUS -ProcCreate( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status; - PIO_STACK_LOCATION IrpSp; - - FILE_FULL_EA_INFORMATION * ea; - cfs_file_t * fp; - - KdPrint(("ProcCreate: Proc device is being opened ...\n")); - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - ea = (PFILE_FULL_EA_INFORMATION) Irp->AssociatedIrp.SystemBuffer; - - if (!ea) { - Status = STATUS_INVALID_PARAMETER; - } else { - fp = lustre_open_file(&ea->EaName[0]); - if (!fp) { - Status = STATUS_OBJECT_NAME_NOT_FOUND; - } else { - IrpSp->FileObject->FsContext = fp; - IrpSp->FileObject->FsContext2 = fp->private_data; - Status = STATUS_SUCCESS; - } - } - - return UTCompleteIrp(Irp, Status, 0); -} - -// -// Close Devcie ... -// - -NTSTATUS -ProcClose( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp) -{ - PIO_STACK_LOCATION IrpSp; - - cfs_file_t * fp; - - KdPrint(("ProcClose: Proc device object is to be closed.\n")); - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - ASSERT(fp != NULL); - ASSERT(IrpSp->FileObject->FsContext2 == fp->private_data); - - lustre_close_file(fp); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - -/* - * proc frame routines - */ - -NTSTATUS -ProcDeviceControl( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST; - PIO_STACK_LOCATION IrpSp; - - ULONG ControlCode; - ULONG InputLength; - ULONG OutputLength; - - PVOID lpvInBuffer; - - KdPrint(("ProcDeviceControl: Proc device ioctling ...\n")); - - Irp->IoStatus.Information = 0; - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode; - InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength; - OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength; - lpvInBuffer = Irp->AssociatedIrp.SystemBuffer; - - ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); - - switch (ControlCode) - { - case IOCTL_LIBCFS_VERSION: - - *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION); - Irp->IoStatus.Information = sizeof(ULONG); - - Status = STATUS_SUCCESS; - - break; - - case IOCTL_LIBCFS_ENTRY: - { - int rc = 0; - cfs_file_t * fp; - - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - if (!fp) { - rc = -EINVAL; - } else { - rc = lustre_ioctl_file(fp, (PCFS_PROC_IOCTL) (lpvInBuffer)); - } - - if (rc == 0) { - Irp->IoStatus.Information = InputLength; - Status = STATUS_SUCCESS; - } - } - } - - Irp->IoStatus.Status = Status; - - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - KdPrint(("ProcDeviceControl: Proc device ioctl returned with status = %xh.\n", Status)); - - return Status; -} - - - -NTSTATUS -ProcReadWrite (PDEVICE_OBJECT DeviceObject, PIRP Irp) -{ - PIO_STACK_LOCATION IrpSp; - NTSTATUS Status; - - cfs_file_t * fp; - int rc; - PCHAR buf; - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - if (Irp->MdlAddress) { - buf = MmGetSystemAddressForMdlSafe( - Irp->MdlAddress, - NormalPagePriority); - } else { - buf = Irp->AssociatedIrp.SystemBuffer; - } - - if (buf == NULL) { - Status = STATUS_SUCCESS; - rc = 0; - } else { - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - if (!fp) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - if (IrpSp->MajorFunction == IRP_MJ_READ) { - rc = lustre_read_file( - fp, IrpSp->Parameters.Read.ByteOffset.LowPart, - IrpSp->Parameters.Read.Length, buf); - } else { - rc = lustre_write_file( - fp, IrpSp->Parameters.Write.ByteOffset.LowPart, - IrpSp->Parameters.Write.Length, buf); - } - if (rc < 0) { - cfs_enter_debugger(); - Status = STATUS_UNSUCCESSFUL; - } else { - Status = STATUS_SUCCESS; - } - } - - -errorout: - return UTCompleteIrp(Irp, Status, rc); -} - - -// -// common dispatch routines -// - -NTSTATUS -UTDispatchRequest( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status; - PIO_STACK_LOCATION IrpSp; - - Status = STATUS_INVALID_DEVICE_REQUEST; - - __try { - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - switch (IrpSp->MajorFunction) { - - case IRP_MJ_CREATE: - if (DeviceObject == PingObject) { - Status = UTCreate(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcCreate(DeviceObject, Irp); - } - break; - - case IRP_MJ_CLOSE: - if (DeviceObject == PingObject) { - Status = UTClose(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcClose(DeviceObject, Irp); - } - break; - - case IRP_MJ_READ: - case IRP_MJ_WRITE: - if (DeviceObject == ProcObject) { - Status = ProcReadWrite(DeviceObject, Irp); - } - break; - - case IRP_MJ_DEVICE_CONTROL: - if (DeviceObject == PingObject) { - Status = UTDeviceControl(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcDeviceControl(DeviceObject, Irp); - } - break; - - case IRP_MJ_SHUTDOWN: - Status = UTShutdown(DeviceObject, Irp); - break; - - default: - - KdPrint(("UTDispatchRequest: Major Function: %xh is not supported.\n", - IrpSp->MajorFunction)); - UTCompleteIrp(Irp, Status, 0); - break; - } - } - - __finally { - } - - return Status; -} - -// -// create a device object and a dosdevice symbol link -// - -PDEVICE_OBJECT -CreateDevice( - IN PDRIVER_OBJECT DriverObject, - IN PWCHAR DeviceName, - IN PWCHAR SymlnkName, - IN BOOLEAN bProcFS - ) -{ - NTSTATUS Status; - - UNICODE_STRING NtDevName; - UNICODE_STRING Win32DevName; - - PDEVICE_EXTENSION DeviceExtension; - PDEVICE_OBJECT DeviceObject; - - /* create the device object with the specified name */ - - RtlInitUnicodeString(&NtDevName, DeviceName); - - Status = IoCreateDevice( - DriverObject, - sizeof(DEVICE_EXTENSION), - &NtDevName, - FILE_DEVICE_UNKNOWN, - 0, - FALSE, - &DeviceObject ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - return NULL; - } - - /* create the symlink to make the device visible to user */ - - RtlInitUnicodeString(&Win32DevName, SymlnkName); - - Status = IoCreateSymbolicLink(&Win32DevName, &NtDevName); - - if (!NT_SUCCESS(Status)) { - - IoDeleteDevice(DeviceObject); - return NULL; - } - - DeviceExtension = (PDEVICE_EXTENSION)DeviceObject->DeviceObjectExtension; - DeviceExtension->bProcFS = bProcFS; - - DeviceObject->Flags |= DO_BUFFERED_IO; - DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; - - return DeviceObject; -} - - -// -// DriverEntry -// - -NTSTATUS DriverEntry( - IN PDRIVER_OBJECT DriverObject, - IN PUNICODE_STRING RegistryPath - ) -{ - KdPrint(("Lustre ping test: Build Time: " __DATE__ " " __TIME__ "\n")); - KdPrint(("Lustre ping test: DriverEntry ... \n")); - - /* initialize libcfs module */ - if (module_init_libcfs_module() != 0) { - KdPrint(("ping: error initialize module: libcfs ...\n")); - goto errorout; - } - - /* initialize lnet module */ - if (module_init_lnet() != 0) { - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: lnet ...\n")); - goto errorout; - } - - /* initialize tdinal module */ - if (module_ksocknal_module_init() != 0) { - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: tdilnd ...\n")); - goto errorout; - } - -#if defined(LUSTRE_PING_CLI) - /* initialize pingcli module */ - if (module_pingcli_init() != 0) { - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: pingcli ...\n")); - goto errorout; - } -#endif - -#if defined(LUSTRE_PING_SRV) - /* initialize pingsrv module */ - if (module_pingsrv_init() != 0) { - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: pingsrv ...\n")); - goto errorout; - } -#endif - - /* create the ping device object */ - PingObject = CreateDevice( - DriverObject, - LUSTRE_PING_DEVICE, - LUSTRE_PING_SYMLNK, - FALSE ); - if (!PingObject) { -#if defined(LUSTRE_PING_CLI) - module_pingcli_cleanup(); -#endif -#if defined(LUSTRE_PING_SRV) - module_pingsrv_cleanup(); -#endif - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - - return STATUS_INSUFFICIENT_RESOURCES; - } - - /* create the libcfs proc fs emultor device object */ - ProcObject = CreateDevice( - DriverObject, - LUSTRE_PROC_DEVICE, - LUSTRE_PROC_SYMLNK, - TRUE ); - if (!ProcObject) { - - IoDeleteDevice(PingObject); -#if defined(LUSTRE_PING_CLI) - module_pingcli_cleanup(); -#endif -#if defined(LUSTRE_PING_SRV) - module_pingsrv_cleanup(); -#endif - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - return STATUS_INSUFFICIENT_RESOURCES; - } - - /* initialize the driver callback routines */ - - DriverObject->MajorFunction[IRP_MJ_CREATE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_CLOSE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_READ] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_WRITE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = UTDispatchRequest; - - return STATUS_SUCCESS; - -errorout: - - cfs_enter_debugger(); - - return STATUS_UNSUCCESSFUL; -} diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c deleted file mode 100644 index 22eefbfec7eca1c8586bdd473d1c216cc6ede6a9..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_srv.c +++ /dev/null @@ -1,291 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Amey Inamdar <amey@calsoftinc.com> - * Kedar Sovani <kedar@calsoftinc.com> - * - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> -#include "ping.h" - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -#define MAXSIZE (16*1024) - -static unsigned ping_head_magic; -static unsigned ping_bulk_magic; -static unsigned long packets_valid = 0; // Valid packets -static int running = 1; -atomic_t pkt; - -static struct pingsrv_data *server=NULL; // Our ping server - -static void *pingsrv_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = LNetMDUnlink (server->mdin_h))) - PDEBUG ("LNetMDUnlink (out head buffer)", rc); - case 2: - /* Free the event queue */ - if ((rc = LNetEQFree (server->eq))) - PDEBUG ("LNetEQFree", rc); - - /* Unlink the client portal from the ME list */ - if ((rc = LNetMEUnlink (server->me))) - PDEBUG ("LNetMEUnlink", rc); - - case 3: - LNetNIFini (); - - case 4: - - case 5: - if (server->in_buf != NULL) - LIBCFS_FREE (server->in_buf, MAXSIZE); - - if (server != NULL) - LIBCFS_FREE (server, - sizeof (struct pingsrv_data)); - - } - - CDEBUG (D_OTHER, "ping sever resources released\n"); - return NULL; -} /* pingsrv_shutdown() */ - - -int pingsrv_thread(void *arg) -{ - int rc; - unsigned long magic; - unsigned long ping_bulk_magic = __cpu_to_le32(0xcafebabe); - - cfs_daemonize ("pingsrv"); - server->tsk = cfs_current(); - - while (running) { - set_current_state (CFS_TASK_INTERRUPTIBLE); - if (atomic_read (&pkt) == 0) { - cfs_schedule_timeout (CFS_TASK_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - continue; - } - - magic = __le32_to_cpu(*((int *)((char *)server->evnt.md.start - + server->evnt.offset))); - - - if(magic != 0xdeadbeef) { - CERROR("Unexpected Packet to the server, magic: %lx %d\n", magic, server->evnt.offset); - - } - memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic)); - - server->mdout.length = server->evnt.rlength; - server->mdout.start = server->in_buf; - server->mdout.threshold = 1; - server->mdout.options = LNET_MD_OP_PUT; - server->mdout.user_ptr = NULL; - server->mdout.eq_handle = LNET_EQ_NONE; - - /* Bind the outgoing buffer */ - if ((rc = LNetMDBind (server->mdout, - LNET_UNLINK, &server->mdout_h))) { - PDEBUG ("LNetMDBind", rc); - pingsrv_shutdown (1); - return 1; - } - - - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = LNET_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eq_handle = server->eq; - - if ((rc = LNetMDAttach (server->me, server->mdin, - LNET_UNLINK, &server->mdin_h))) { - PDEBUG ("LNetMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - if ((rc = LNetPut (server->evnt.target.nid, server->mdout_h, - LNET_NOACK_REQ, - server->evnt.initiator, PTL_PING_CLIENT, - 0, 0, 0))) - PDEBUG ("LNetPut", rc); - - atomic_dec (&pkt); - - } - pingsrv_shutdown (1); - running = 1; - return 0; -} - -static void pingsrv_packet(lnet_event_t *ev) -{ - atomic_inc (&pkt); - wake_up_process (server->tsk); -} /* pingsrv_head() */ - -static void pingsrv_callback(lnet_event_t *ev) -{ - - if (ev == NULL) { - CERROR ("null in callback, ev=%p\n", ev); - return; - } - server->evnt = *ev; - - CWARN ("received ping from nid %s " - "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n", - libcfs_nid2str(ev->initiator.nid), - ev->offset, ev->rlength, ev->mlength, - __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset))), - __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset + sizeof(unsigned)))), - __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset + 2 * - sizeof(unsigned))))); - - packets_valid++; - - pingsrv_packet(ev); - -} /* pingsrv_callback() */ - - -static struct pingsrv_data *pingsrv_setup(void) -{ - int rc; - - /* Aquire and initialize the proper nal for portals. */ - rc = LNetNIInit(0); - if (!(rc == 0 || rc == 1)) { - CDEBUG (D_OTHER, "LNetNIInit: error %d\n", rc); - return pingsrv_shutdown (4); - } - - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = LNetGetId (1, &server->my_id))) { - PDEBUG ("LNetGetId", rc); - return pingsrv_shutdown (2); - } - - server->id_local.nid = LNET_NID_ANY; - server->id_local.pid = LNET_PID_ANY; - - /* Attach a match entries for header packets */ - if ((rc = LNetMEAttach (PTL_PING_SERVER, - server->id_local,0, ~0, - LNET_RETAIN, LNET_INS_AFTER, &server->me))) { - PDEBUG ("LNetMEAttach", rc); - return pingsrv_shutdown (2); - } - - - if ((rc = LNetEQAlloc (1024, &pingsrv_callback, &server->eq))) { - PDEBUG ("LNetEQAlloc (callback)", rc); - return pingsrv_shutdown (2); - } - - LIBCFS_ALLOC (server->in_buf, MAXSIZE); - if(!server->in_buf){ - CDEBUG (D_OTHER,"Allocation error\n"); - return pingsrv_shutdown(2); - } - - /* Setup the incoming buffer */ - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = LNET_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eq_handle = server->eq; - memset (server->in_buf, 0, STDSIZE); - - if ((rc = LNetMDAttach (server->me, server->mdin, - LNET_UNLINK, &server->mdin_h))) { - PDEBUG ("LNetMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - /* Success! */ - return server; -} /* pingsrv_setup() */ - -static int pingsrv_start(void) -{ - long pid; - - /* Setup our server */ - if (!pingsrv_setup()) { - CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); - return -ENOMEM; - } - pid = cfs_kernel_thread (pingsrv_thread,NULL,0); - if (pid < 0) { - CERROR("Can't start pingsrv thread: rc = %ld\n", pid); - return (int)pid; - } - - return 0; -} /* pingsrv_start() */ - -static int __init pingsrv_init(void) -{ - ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC); - ping_bulk_magic = __cpu_to_le32(PING_BULK_MAGIC); - LIBCFS_ALLOC (server, sizeof(struct pingsrv_data)); - atomic_set(&pkt, 0); - return pingsrv_start (); -} /* pingsrv_init() */ - -static void /*__exit*/ pingsrv_cleanup(void) -{ - cfs_remove_proc_entry ("net/pingsrv", NULL); - - running = 0; - wake_up_process (server->tsk); - while (running != 1) { - set_current_state (CFS_TASK_UNINT); - cfs_schedule_timeout (CFS_TASK_UNINT, cfs_time_seconds(1)); - } - -} /* pingsrv_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A kernel space ping server for portals testing"); -MODULE_LICENSE("GPL"); - -cfs_module(ping_srv, "1.0.0", pingsrv_init, pingsrv_cleanup); diff --git a/lnet/tests/ping_srv/Info.plist b/lnet/tests/ping_srv/Info.plist deleted file mode 100644 index b08212c9493728f67f9c3050ea64c80b1af97dca..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_srv/Info.plist +++ /dev/null @@ -1,40 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> -<plist version="1.0"> -<dict> - <key>CFBundleDevelopmentRegion</key> - <string>English</string> - <key>CFBundleExecutable</key> - <string>pingsrv</string> - <key>CFBundleIconFile</key> - <string></string> - <key>CFBundleIdentifier</key> - <string>com.clusterfs.lustre.pingsrv</string> - <key>CFBundleInfoDictionaryVersion</key> - <string>6.0</string> - <key>CFBundlePackageType</key> - <string>KEXT</string> - <key>CFBundleSignature</key> - <string>????</string> - <key>CFBundleVersion</key> - <string>1.0.1</string> - <key>OSBundleCompatibleVersion</key> - <string>1.0.0</string> - <key>OSBundleLibraries</key> - <dict> - <key>com.apple.kpi.bsd</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.libkern</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.mach</key> - <string>8.0.0b1</string> - <key>com.apple.kpi.unsupported</key> - <string>8.0.0b1</string> - <key>com.clusterfs.lustre.libcfs</key> - <string>1.0.0</string> - <key>com.clusterfs.lustre.lnet</key> - <string>1.0.0</string> - </dict> -</dict> -</plist> - diff --git a/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj b/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj deleted file mode 100644 index 0173417778a79ae47aab52b3593daebeb4904f0e..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj +++ /dev/null @@ -1,255 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = ping_srv; - refType = 4; - sourceTree = "<group>"; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = "<group>"; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 1987212D0739090900338926 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - name = ping_srv.c; - path = ../ping_srv.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 1987212E0739090900338926 = { - fileRef = 1987212D0739090900338926; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = "<group>"; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 1987212D0739090900338926, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = "<group>"; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = "../../include ../"; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_srv; - MODULE_START = ping_srv_start; - MODULE_STOP = ping_srv_stop; - MODULE_VERSION = 1.0.0d1; - OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = ping_srv; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = ping_srv; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = ping_srv; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 1987212E0739090900338926, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = "<group>"; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = ping_srv.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/tests/ping_srv/winnt-pingsrv.c b/lnet/tests/ping_srv/winnt-pingsrv.c deleted file mode 100644 index 7c9a1a1958bef7844fa78b98262c18466942adec..0000000000000000000000000000000000000000 --- a/lnet/tests/ping_srv/winnt-pingsrv.c +++ /dev/null @@ -1,634 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=4:tabstop=4: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Matt Wu <mattwu@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LNET - -/* - * Included Headers - */ - - -#include <libcfs/libcfs.h> - - -/* libcfs module init/exit routines */ -DECLARE_INIT(init_libcfs_module); -DECLARE_EXIT(exit_libcfs_module); - -/* portal module init/exit routines */ -DECLARE_INIT(init_lnet); -DECLARE_EXIT(fini_lnet); - -/* tdinal module init/exit routines */ -DECLARE_INIT(ksocknal_module_init); -DECLARE_EXIT(ksocknal_module_fini); - -/* pingcli module init/exit routines */ -DECLARE_INIT(pingcli_init); -DECLARE_EXIT(pingcli_cleanup); - - -/* pingsrv module init/exit routines */ -DECLARE_INIT(pingsrv_init); -DECLARE_EXIT(pingsrv_cleanup); - -/* - * structure definitions - */ - - -#define LUSTRE_PING_VERSION 0x00010000 /* ping srv/cli version: 0001.0000 */ - -#define LUSTRE_PING_DEVICE L"\\Device\\LNET" /* device object name */ -#define LUSTRE_PING_SYMLNK L"\\DosDevices\\LNET" /* user-visible name for the device*/ - -typedef struct _DEVICE_EXTENSION -{ - BOOLEAN bProcFS; - -} DEVICE_EXTENSION, *PDEVICE_EXTENSION; - - -/* - * global definitions - */ - -PDEVICE_OBJECT PingObject = NULL; /* ping device object */ -PDEVICE_OBJECT ProcObject = NULL; /* procfs emulator device */ - - -/* - * common routines - */ - - -// -// complete Irp request ... -// - -NTSTATUS -UTCompleteIrp( - PIRP Irp, - NTSTATUS Status, - ULONG Info - ) -{ - Irp->IoStatus.Status = Status; - Irp->IoStatus.Information = Info; - IoCompleteRequest(Irp,IO_NO_INCREMENT); - - return Status; -} - -// -// Open/Create Device ... -// - -NTSTATUS -UTCreate( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - KdPrint(("UTCreate: DeviceCreate ...\n")); - - return UTCompleteIrp(Irp,STATUS_SUCCESS,0); -} - -// -// Close Devcie ... -// - -NTSTATUS -UTClose( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp) -{ - KdPrint(("UTClose: Device Closed.\n")); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - - - -NTSTATUS -UTShutdown( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - KdPrint(("UTShutdown: shuting TdiSock ...\n")); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - -// -// driver frame Routines ... -// - - -NTSTATUS -UTDeviceControl( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST; - PIO_STACK_LOCATION IrpSp; - - ULONG ControlCode; - ULONG InputLength; - ULONG OutputLength; - - PVOID lpvInBuffer; - - KdPrint(("UTDeviceControl: Device Ioctl ...\n")); - - Irp->IoStatus.Information = 0; - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode; - InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength; - OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength; - lpvInBuffer = Irp->AssociatedIrp.SystemBuffer; - - ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); - - switch (ControlCode) - { - case IOCTL_LIBCFS_VERSION: - - *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION); - Irp->IoStatus.Information = sizeof(ULONG); - Status = STATUS_SUCCESS; - break; - - default: - break; - } - - Irp->IoStatus.Status = Status; - - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - KdPrint(("UTDeviceControl: Device Ioctl returned.\n")); - - return Status; -} - -NTSTATUS -ProcCreate( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status; - PIO_STACK_LOCATION IrpSp; - - FILE_FULL_EA_INFORMATION * ea; - cfs_file_t * fp; - - KdPrint(("ProcCreate: Proc device is being opened ...\n")); - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - ea = (PFILE_FULL_EA_INFORMATION) Irp->AssociatedIrp.SystemBuffer; - - if (!ea) { - Status = STATUS_INVALID_PARAMETER; - } else { - fp = lustre_open_file(&ea->EaName[0]); - if (!fp) { - Status = STATUS_OBJECT_NAME_NOT_FOUND; - } else { - IrpSp->FileObject->FsContext = fp; - IrpSp->FileObject->FsContext2 = fp->private_data; - Status = STATUS_SUCCESS; - } - } - - return UTCompleteIrp(Irp, Status, 0); -} - -// -// Close Devcie ... -// - -NTSTATUS -ProcClose( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp) -{ - PIO_STACK_LOCATION IrpSp; - - cfs_file_t * fp; - - KdPrint(("ProcClose: Proc device object is to be closed.\n")); - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - ASSERT(fp != NULL); - ASSERT(IrpSp->FileObject->FsContext2 == fp->private_data); - - lustre_close_file(fp); - - return UTCompleteIrp(Irp, STATUS_SUCCESS, 0); - - UNREFERENCED_PARAMETER(DeviceObject); -} - -/* - * proc frame routines - */ - -NTSTATUS -ProcDeviceControl( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST; - PIO_STACK_LOCATION IrpSp; - - ULONG ControlCode; - ULONG InputLength; - ULONG OutputLength; - - PVOID lpvInBuffer; - - KdPrint(("ProcDeviceControl: Proc device ioctling ...\n")); - - Irp->IoStatus.Information = 0; - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode; - InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength; - OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength; - lpvInBuffer = Irp->AssociatedIrp.SystemBuffer; - - ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); - - switch (ControlCode) - { - case IOCTL_LIBCFS_VERSION: - - *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION); - Irp->IoStatus.Information = sizeof(ULONG); - - Status = STATUS_SUCCESS; - - break; - - case IOCTL_LIBCFS_ENTRY: - { - int rc = 0; - cfs_file_t * fp; - - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - if (!fp) { - rc = -EINVAL; - } else { - rc = lustre_ioctl_file(fp, (PCFS_PROC_IOCTL) (lpvInBuffer)); - } - - if (rc == 0) { - Irp->IoStatus.Information = InputLength; - Status = STATUS_SUCCESS; - } - } - } - - Irp->IoStatus.Status = Status; - - IoCompleteRequest(Irp, IO_NO_INCREMENT); - - KdPrint(("ProcDeviceControl: Proc device ioctl returned with status = %xh.\n", Status)); - - return Status; -} - - - -NTSTATUS -ProcReadWrite (PDEVICE_OBJECT DeviceObject, PIRP Irp) -{ - PIO_STACK_LOCATION IrpSp; - NTSTATUS Status; - - cfs_file_t * fp; - int rc; - PCHAR buf; - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - if (Irp->MdlAddress) { - buf = MmGetSystemAddressForMdlSafe( - Irp->MdlAddress, - NormalPagePriority); - } else { - buf = Irp->AssociatedIrp.SystemBuffer; - } - - if (buf == NULL) { - Status = STATUS_SUCCESS; - rc = 0; - } else { - fp = (cfs_file_t *) IrpSp->FileObject->FsContext; - - if (!fp) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - - if (IrpSp->MajorFunction == IRP_MJ_READ) { - rc = lustre_read_file( - fp, IrpSp->Parameters.Read.ByteOffset.LowPart, - IrpSp->Parameters.Read.Length, buf); - } else { - rc = lustre_write_file( - fp, IrpSp->Parameters.Write.ByteOffset.LowPart, - IrpSp->Parameters.Write.Length, buf); - } - if (rc < 0) { - cfs_enter_debugger(); - Status = STATUS_UNSUCCESSFUL; - } else { - Status = STATUS_SUCCESS; - } - } - - -errorout: - return UTCompleteIrp(Irp, Status, rc); -} - - -// -// common dispatch routines -// - -NTSTATUS -UTDispatchRequest( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp - ) -{ - NTSTATUS Status; - PIO_STACK_LOCATION IrpSp; - - Status = STATUS_INVALID_DEVICE_REQUEST; - - __try { - - IrpSp = IoGetCurrentIrpStackLocation(Irp); - - switch (IrpSp->MajorFunction) { - - case IRP_MJ_CREATE: - if (DeviceObject == PingObject) { - Status = UTCreate(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcCreate(DeviceObject, Irp); - } - break; - - case IRP_MJ_CLOSE: - if (DeviceObject == PingObject) { - Status = UTClose(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcClose(DeviceObject, Irp); - } - break; - - case IRP_MJ_READ: - case IRP_MJ_WRITE: - if (DeviceObject == ProcObject) { - Status = ProcReadWrite(DeviceObject, Irp); - } - break; - - case IRP_MJ_DEVICE_CONTROL: - if (DeviceObject == PingObject) { - Status = UTDeviceControl(DeviceObject, Irp); - } else if (DeviceObject == ProcObject) { - Status = ProcDeviceControl(DeviceObject, Irp); - } - break; - - case IRP_MJ_SHUTDOWN: - Status = UTShutdown(DeviceObject, Irp); - break; - - default: - - KdPrint(("UTDispatchRequest: Major Function: %xh is not supported.\n", - IrpSp->MajorFunction)); - UTCompleteIrp(Irp, Status, 0); - break; - } - } - - __finally { - } - - return Status; -} - -// -// create a device object and a dosdevice symbol link -// - -PDEVICE_OBJECT -CreateDevice( - IN PDRIVER_OBJECT DriverObject, - IN PWCHAR DeviceName, - IN PWCHAR SymlnkName, - IN BOOLEAN bProcFS - ) -{ - NTSTATUS Status; - - UNICODE_STRING NtDevName; - UNICODE_STRING Win32DevName; - - PDEVICE_EXTENSION DeviceExtension; - PDEVICE_OBJECT DeviceObject; - - /* create the device object with the specified name */ - - RtlInitUnicodeString(&NtDevName, DeviceName); - - Status = IoCreateDevice( - DriverObject, - sizeof(DEVICE_EXTENSION), - &NtDevName, - FILE_DEVICE_UNKNOWN, - 0, - FALSE, - &DeviceObject ); - - if (!NT_SUCCESS(Status)) { - - cfs_enter_debugger(); - return NULL; - } - - /* create the symlink to make the device visible to user */ - - RtlInitUnicodeString(&Win32DevName, SymlnkName); - - Status = IoCreateSymbolicLink(&Win32DevName, &NtDevName); - - if (!NT_SUCCESS(Status)) { - - IoDeleteDevice(DeviceObject); - return NULL; - } - - DeviceExtension = (PDEVICE_EXTENSION)DeviceObject->DeviceObjectExtension; - DeviceExtension->bProcFS = bProcFS; - - DeviceObject->Flags |= DO_BUFFERED_IO; - DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING; - - return DeviceObject; -} - - -// -// DriverEntry -// - -NTSTATUS DriverEntry( - IN PDRIVER_OBJECT DriverObject, - IN PUNICODE_STRING RegistryPath - ) -{ - KdPrint(("Lustre ping test: Build Time: " __DATE__ " " __TIME__ "\n")); - KdPrint(("Lustre ping test: DriverEntry ... \n")); - - /* initialize libcfs module */ - if (module_init_libcfs_module() != 0) { - KdPrint(("ping: error initialize module: libcfs ...\n")); - goto errorout; - } - - /* initialize lnet module */ - if (module_init_lnet() != 0) { - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: lnet ...\n")); - goto errorout; - } - - /* initialize tdinal module */ - if (module_ksocknal_module_init() != 0) { - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: tdilnd ...\n")); - goto errorout; - } - -#if defined(LUSTRE_PING_CLI) - /* initialize pingcli module */ - if (module_pingcli_init() != 0) { - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: pingcli ...\n")); - goto errorout; - } -#endif - -#if defined(LUSTRE_PING_SRV) - /* initialize pingsrv module */ - if (module_pingsrv_init() != 0) { - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - KdPrint(("ping: error initialize module: pingsrv ...\n")); - goto errorout; - } -#endif - - /* create the ping device object */ - PingObject = CreateDevice( - DriverObject, - LUSTRE_PING_DEVICE, - LUSTRE_PING_SYMLNK, - FALSE ); - if (!PingObject) { -#if defined(LUSTRE_PING_CLI) - module_pingcli_cleanup(); -#endif -#if defined(LUSTRE_PING_SRV) - module_pingsrv_cleanup(); -#endif - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - - return STATUS_INSUFFICIENT_RESOURCES; - } - - /* create the libcfs proc fs emultor device object */ - ProcObject = CreateDevice( - DriverObject, - LUSTRE_PROC_DEVICE, - LUSTRE_PROC_SYMLNK, - TRUE ); - if (!ProcObject) { - - IoDeleteDevice(PingObject); -#if defined(LUSTRE_PING_CLI) - module_pingcli_cleanup(); -#endif -#if defined(LUSTRE_PING_SRV) - module_pingsrv_cleanup(); -#endif - module_ksocknal_module_fini(); - module_fini_lnet(); - module_exit_libcfs_module(); - return STATUS_INSUFFICIENT_RESOURCES; - } - - /* initialize the driver callback routines */ - - DriverObject->MajorFunction[IRP_MJ_CREATE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_CLOSE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_READ] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_WRITE] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = UTDispatchRequest; - DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = UTDispatchRequest; - - return STATUS_SUCCESS; - -errorout: - - cfs_enter_debugger(); - - return STATUS_UNSUCCESSFUL; -} diff --git a/lnet/tests/startclient.sh b/lnet/tests/startclient.sh deleted file mode 100644 index 2a30a017a3388b4e028637d38e32c854d2158da8..0000000000000000000000000000000000000000 --- a/lnet/tests/startclient.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -case `uname -r` in - 2.6.*) ext=.ko;; - 2.4.*) ext=.o;; - *) echo unknown OS version; return 1;; -esac - -insmod pingcli$ext - diff --git a/lnet/tests/startserver.sh b/lnet/tests/startserver.sh deleted file mode 100644 index 355a8aec7c97c899e54a33c5f6c2ba22b97c2b82..0000000000000000000000000000000000000000 --- a/lnet/tests/startserver.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh - -case `uname -r` in - 2.6.*) ext=.ko;; - 2.4.*) ext=.o;; - *) echo unknown OS version; return 1;; -esac - -insmod pingsrv$ext diff --git a/lnet/tests/stopclient.sh b/lnet/tests/stopclient.sh deleted file mode 100644 index 276d37410cf094df097f30d477e1ec0d0d892ca8..0000000000000000000000000000000000000000 --- a/lnet/tests/stopclient.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -rmmod pingcli diff --git a/lnet/tests/stopserver.sh b/lnet/tests/stopserver.sh deleted file mode 100644 index 829afc6b47dbf831eed872957e2ebd7b6519e28c..0000000000000000000000000000000000000000 --- a/lnet/tests/stopserver.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -rmmod pingsrv diff --git a/lnet/tests/ut.README b/lnet/tests/ut.README deleted file mode 100644 index ef70b2fe6d9bfa19ea69e9a61a2d0dcd1c8a241a..0000000000000000000000000000000000000000 --- a/lnet/tests/ut.README +++ /dev/null @@ -1,43 +0,0 @@ -The utcli (unit test client) and utsrv (unit test server) are very simple -unit test tools, for sending and receiving single get's/put's of a specific -size, using the LNET API set. - -Test Setup -uml1 ip=192.168.2.1 -uml2 ip=192.168.2.2 - --------------------------------------------------------------------------------- -Example Test #1 - small get operation - -1) Setup server for listening -uml2 $ insmod utsvr.ko - -2) Do the get operation NID must be specified but all other are default -paramters which causes a 300 byte get op -uml1 $ insmod utcli.ko nid=192.168.2.2@tcp - -3) Unload the utsvr because currently it only supports a single operation -buffers are not reposted after they are consumed -*** FIX THIS LIMITATION *** -uml2 $ rmmod utsvr - --------------------------------------------------------------------------------- -Example Test #2 - small put operation -(The setup and cleanup of the server are left out, because they are the -same as above) - -1) The adition of the "put=1" paramter causes a put rather than a get. The -default size of 300 is still used. -uml1 $ insmod utcli.ko nid=192.168.2.2@tcp put=1 - --------------------------------------------------------------------------------- -Example Test #3 - large get operation - -1) Setup server for listening. The size must be specified on the server or else -the default of 300 bytes will be used. -uml2 $ insmod utsvr.ko pkt_size=5000 - -2) Do the large get operation pkt_size=5000. put=0 is a get operation, -it is equivlenet to just not having that parameter. -uml1 $ insmod utcli.ko nid=192.168.2.2@tcp put=0 pkt_size=5000 - diff --git a/lnet/tests/ut.h b/lnet/tests/ut.h deleted file mode 100644 index 96ccb344c1b9289b02659e3bd421b6c3cbb8fadb..0000000000000000000000000000000000000000 --- a/lnet/tests/ut.h +++ /dev/null @@ -1,45 +0,0 @@ -#define DEBUG_SUBSYSTEM S_PINGER - -#include <libcfs/kp30.h> -#include <lnet/lnet.h> - -#define UT_PORTAL 42 - -#define PJK_UT_MSG(fmt...) do{printk("<1>" UT_MSG_MODULE_NAME ":%-30s:",__FUNCTION__);printk(fmt);}while(0) - -#define DO_TYPE(x) case x: return #x; - -const char *get_ev_type_string(int evtype) -{ - switch(evtype) - { - DO_TYPE(LNET_EVENT_GET); - DO_TYPE(LNET_EVENT_PUT); - DO_TYPE(LNET_EVENT_REPLY); - DO_TYPE(LNET_EVENT_ACK); - DO_TYPE(LNET_EVENT_SEND); - DO_TYPE(LNET_EVENT_UNLINK); - default: - return ""; - } -} - -static volatile int seen = 0; -static volatile int seen_unlink = 0; - -static inline void handler(lnet_event_t *ev) -{ - PJK_UT_MSG("-------- EVENT START ------------\n"); - PJK_UT_MSG("type=%d %s\n",ev->type,get_ev_type_string(ev->type)); - PJK_UT_MSG("portal=%d\n",ev->pt_index); - PJK_UT_MSG("matchbits="LPX64"\n",ev->match_bits); - PJK_UT_MSG("request length=%d\n",ev->rlength); - PJK_UT_MSG("manipulated length=%d\n",ev->mlength); - PJK_UT_MSG("offset=%d\n",ev->offset); - PJK_UT_MSG("status=%d\n",ev->status); - PJK_UT_MSG("unlinked=%d\n",ev->unlinked); - PJK_UT_MSG("md.user_ptr=%p\n",ev->md.user_ptr); - PJK_UT_MSG("-------- EVENT END --------------\n"); - ++seen; - if(ev->unlinked)++seen_unlink; -} diff --git a/lnet/tests/ut_cli.c b/lnet/tests/ut_cli.c deleted file mode 100644 index 3a6e255151e6790847961e33cf0da612ca1f13e0..0000000000000000000000000000000000000000 --- a/lnet/tests/ut_cli.c +++ /dev/null @@ -1,211 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - - #define UT_MSG_MODULE_NAME "utcli " - #include "ut.h" - -int pkt_size = 300; -module_param(pkt_size,int,S_IRUGO); -int get=0; -module_param(get,int,S_IRUGO); -int put=0; -module_param(put,int,S_IRUGO); -int auto_unlink=1; -module_param(auto_unlink,int,S_IRUGO); -char* nid=0; -module_param(nid,charp,S_IRUGO); - -static int __init utcli_init(void) -{ - lnet_handle_md_t mdh; - lnet_process_id_t target; - lnet_process_id_t mypid; - lnet_handle_eq_t eqh; - lnet_md_t md; - int rc,i; - char* buffer = 0; - /* - * Put and get really control the same thing - */ - if(put)get=0; - /* Default to get */ - if(!put && !get)get=1; - - PJK_UT_MSG("utcli_init %s\n",get==0?"PUT":"GET"); - PJK_UT_MSG("pkt_size=%d\n",pkt_size); - PJK_UT_MSG("auto_unlink=%d\n",auto_unlink); - PJK_UT_MSG("nid=%s\n",nid); - if(nid == 0) - { - CERROR("NID Must be specified\n"); - return -EINVAL; - } - - PJK_UT_MSG("LIBCFS_ALLOC\n"); - LIBCFS_ALLOC (buffer, pkt_size); - if (buffer == NULL) - { - CERROR ("Unable to allocate out_buf (%d bytes)\n", pkt_size); - return -ENOMEM; - } - - PJK_UT_MSG("LNetNiInit()\n"); - rc = LNetNIInit(0); - if (rc < 0) - { - CERROR ("LNetNIInit: error %d\n", rc); - goto exit0; - } - - - LNetGetId(0,&mypid); - PJK_UT_MSG("my.nid="LPX64"\n",mypid.nid); - PJK_UT_MSG("my.pid=0x%x\n",mypid.pid); - - - PJK_UT_MSG("LNetEQAlloc\n"); - rc = LNetEQAlloc( - 64, /* max number of envents why 64? */ - handler, /* handler callback */ - &eqh); /* output handle */ - if(rc != 0) { - CERROR("LNetEQAlloc failed %d\n",rc); - goto exit1; - } - - md.start = buffer; - md.length = pkt_size; - md.threshold = auto_unlink ? (get ? 2 : 1) : 15; - md.max_size = 0; - md.options = 0; - if(get){ - md.options |= LNET_MD_OP_GET; - }else{ - md.options |= LNET_MD_OP_PUT; - md.options |= LNET_MD_ACK_DISABLE; - } - md.user_ptr = 0; - md.eq_handle = eqh; - - PJK_UT_MSG("LNetMDBind()\n"); - if ((rc=LNetMDBind ( - md, - LNET_UNLINK, - &mdh))) /* out handle */ - { - CERROR ("LNetMDBind error %d\n", rc); - goto exit4; - } - - target.pid = 0; - target.nid = libcfs_str2nid(nid); - - PJK_UT_MSG("target.nid="LPX64"\n",target.nid); - - for(i=0;i<1;i++) - { - if(get){ - PJK_UT_MSG("LNetGet()\n"); - if((rc = LNetGet ( - LNET_ID_ANY, - mdh, - target, /* peer "address" */ - UT_PORTAL, /* portal */ - i, /* match bits */ - 0))) /* header data */ - { - CERROR("LNetGet %d error %d\n",i, rc); - goto exit5; - } - }else{ - - PJK_UT_MSG("LNetPut()\n"); - if((rc = LNetPut ( - LNET_ID_ANY, - mdh, - LNET_ACK_REQ, /* we want ack */ - target, /* peer "address" */ - UT_PORTAL, /* portal */ - i, /* match bits */ - 0, /* offset */ - 0))) /* header data */ - { - CERROR("LNetPut %d error %d\n",i, rc); - goto exit5; - } - } - } - - - PJK_UT_MSG("------------Waiting for SEND_END()------------\n"); - i=0; - while(i++ < 10 && seen == 0) - cfs_pause(cfs_time_seconds(1)); - if(seen == 0) - PJK_UT_MSG("------------------TIMEDOUT--------------------\n"); - else{ - int good; - if(get){ - PJK_UT_MSG("------------Waiting for REPLY()------------\n"); - i=0; - while(i++ < 10 && seen == 1) - cfs_pause(cfs_time_seconds(1)); - good = (seen != 1); - }else{ - good = 1; - } - - if(good) - PJK_UT_MSG("------------------COMPLETE--------------------\n"); - else - PJK_UT_MSG("------------------TIMEDOUT--------------------\n"); - } - - - - /* - PJK_UT_MSG("LNetEQWait()\n"); - rc = LNetEQWait(eqh,&ev); - if(rc != 0) - goto exit5; - */ - -exit5: - PJK_UT_MSG("LNetMDUnlink()\n"); - LNetMDUnlink(mdh); - - if(!seen_unlink){ - PJK_UT_MSG("------------Waiting for UNLINK ------------\n"); - i=0; - while(i++ < 120 && seen_unlink == 0) - cfs_pause(cfs_time_seconds(1)); - } - - cfs_pause(cfs_time_seconds(1)); -exit4: - PJK_UT_MSG("LNetEQFree()\n"); - LNetEQFree(eqh); -exit1: - PJK_UT_MSG("LNetNiFini()\n"); - LNetNIFini(); -exit0: - if(buffer) - LIBCFS_FREE(buffer,pkt_size); - - return -1; -} /* utcli_init() */ - - -static void /*__exit*/ utcli_cleanup(void) -{ - PJK_UT_MSG(">>>\n"); - PJK_UT_MSG("<<<\n"); -} /* utcli_cleanup() */ - - -MODULE_AUTHOR("PJ Kirner (CFS)"); -MODULE_DESCRIPTION("A simple LNET Unit Test module"); -MODULE_LICENSE("GPL"); - -cfs_module(ut_cli, "1.0.0", utcli_init, utcli_cleanup); diff --git a/lnet/tests/ut_srv.c b/lnet/tests/ut_srv.c deleted file mode 100644 index 3ffbac6fea08d06a01fdb45c580fb3375ac20879..0000000000000000000000000000000000000000 --- a/lnet/tests/ut_srv.c +++ /dev/null @@ -1,144 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - - -#define UT_MSG_MODULE_NAME "utsrv " -#include "ut.h" - - -int pkt_size = 300; -module_param(pkt_size,int,S_IRUGO); -int auto_unlink=1; -module_param(auto_unlink,int,S_IRUGO); - -char *buffer = 0; -lnet_handle_eq_t eqh; -lnet_handle_me_t meh; -lnet_handle_md_t mdh; - -static int __init utsrv_init(void) -{ - int rc; - lnet_process_id_t anypid; - lnet_process_id_t mypid; - lnet_md_t md; - - PJK_UT_MSG(">>>\n"); - PJK_UT_MSG("pkt_size=%d\n",pkt_size); - PJK_UT_MSG("auto_unlink=%d\n",auto_unlink); - - PJK_UT_MSG("LIBCFS_ALLOC\n"); - LIBCFS_ALLOC (buffer, pkt_size); - if (buffer == NULL) - { - CERROR ("Unable to allocate out_buf (%d bytes)\n", pkt_size); - rc = -ENOMEM; - goto exit0; - } - - PJK_UT_MSG("LNetNiInit()\n"); - rc = LNetNIInit(0); - if (rc < 0) - { - CERROR ("LNetNIInit: error %d\n", rc); - goto exit1; - } - - LNetGetId(0,&mypid); - PJK_UT_MSG("my.nid="LPX64"\n",mypid.nid); - PJK_UT_MSG("my.pid=0x%x\n",mypid.pid); - - PJK_UT_MSG("LNetEQAlloc\n"); - rc = LNetEQAlloc( - 64, /* max number of envents why 64? */ - handler, /* handler callback */ - &eqh); /* output handle */ - if(rc != 0) { - CERROR("LNetEQAlloc failed %d\n",rc); - goto exit2; - } - - anypid.nid = LNET_NID_ANY; - anypid.pid = LNET_PID_ANY; - - - PJK_UT_MSG("LNetMEAttach\n"); - rc = LNetMEAttach( - UT_PORTAL, /* ptl index*/ - anypid, /* pid - in this case allow any*/ - 0, /*matchbits*/ - 0x0FFFF, /*ignorebits - ignore botton 16-bits*/ - LNET_UNLINK, /* unlik vs LNET_RETAIN*/ - LNET_INS_BEFORE, - &meh); - if(rc != 0) { - CERROR("LNetMeAttach failed %d\n",rc); - goto exit3; - } - - md.start = buffer; - md.length = pkt_size; - md.threshold = auto_unlink ? 1 : 100; - md.max_size = 0; - md.options = 0; - md.options |= LNET_MD_OP_GET; - md.options |= LNET_MD_OP_PUT; - md.options |= LNET_MD_ACK_DISABLE; - md.user_ptr= 0; - md.eq_handle = eqh; - - PJK_UT_MSG("LNetMDAttach\n"); - rc = LNetMDAttach( - meh, - md, - LNET_UNLINK, - &mdh); - if(rc != 0){ - CERROR("LNetMDAttach failed %d\n",rc); - goto exit4; - } - - rc = 0; - goto exit0; - -exit4: - PJK_UT_MSG("LNetMEUnlink()\n"); - LNetMEUnlink(meh); -exit3: - PJK_UT_MSG("LNetEQFree()\n"); - LNetEQFree(eqh); -exit2: - PJK_UT_MSG("LNetNiFini()\n"); - LNetNIFini(); -exit1: - LIBCFS_FREE(buffer,pkt_size); -exit0: - PJK_UT_MSG("<<< rc=%d\n",rc); - return rc; - -} /* utsrv_init() */ - - -static void /*__exit*/ utsrv_cleanup(void) -{ - PJK_UT_MSG(">>>\n"); - PJK_UT_MSG("LNetMDUnlink()\n"); - LNetMDUnlink(mdh); - PJK_UT_MSG("LNetMEUnlink()\n"); - LNetMEUnlink(meh); - PJK_UT_MSG("LNetEQFree()\n"); - LNetEQFree(eqh); - PJK_UT_MSG("LNetNiFini()\n"); - LNetNIFini(); - LIBCFS_FREE(buffer,pkt_size); - PJK_UT_MSG("<<<\n"); -} /* utsrv_cleanup() */ - - -MODULE_AUTHOR("PJ Kirner (CFS)"); -MODULE_DESCRIPTION("A simple LNET Unit Test module"); -MODULE_LICENSE("GPL"); - -cfs_module(utsvr, "1.0.0", utsrv_init, utsrv_cleanup); - diff --git a/lnet/ulnds/.cvsignore b/lnet/ulnds/.cvsignore deleted file mode 100644 index 2711a44afcdfbbe0500e2607c084999be5d87f9e..0000000000000000000000000000000000000000 --- a/lnet/ulnds/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -.deps -Makefile -autoMakefile -autoMakefile.in diff --git a/lnet/ulnds/Makefile.in b/lnet/ulnds/Makefile.in deleted file mode 100644 index 78432ee60aa2f30a99ddc3633676ada906c609fd..0000000000000000000000000000000000000000 --- a/lnet/ulnds/Makefile.in +++ /dev/null @@ -1,5 +0,0 @@ -@BUILD_USOCKLND_TRUE@subdir-m += socklnd -@BUILD_UPTLLND_TRUE@subdir-m += ptllnd - -@INCLUDE_RULES@ - diff --git a/lnet/ulnds/autoMakefile.am b/lnet/ulnds/autoMakefile.am deleted file mode 100644 index 0e7fa4c2d8b3cca9a658366bfe0cca91cbc99a52..0000000000000000000000000000000000000000 --- a/lnet/ulnds/autoMakefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = socklnd ptllnd diff --git a/lnet/ulnds/ptllnd/.cvsignore b/lnet/ulnds/ptllnd/.cvsignore deleted file mode 100644 index e9955884756af11fe171e89bf99e459ac44f1a2a..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/ulnds/ptllnd/Makefile.am b/lnet/ulnds/ptllnd/Makefile.am deleted file mode 100644 index e48cb85fa21073d8bc0ba7e515b97e0a33110fd2..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ - -if BUILD_UPTLLND -if LIBLUSTRE -noinst_LIBRARIES = libptllnd.a -noinst_HEADERS = ptllnd.h -libptllnd_a_SOURCES = ptllnd.h ptllnd.c ptllnd_cb.c -libptllnd_a_CPPFLAGS= $(LLCPPFLAGS) -# I need $(PTLNDCPPLFLAGS) to be AFTER $(CPPFLAGS) -# Adding them into $(AM_CFLAGS) seems wrong, but lets me get on.. -libptllnd_a_CFLAGS= $(PTLLNDCPPFLAGS) $(LLCFLAGS) -endif -endif diff --git a/lnet/ulnds/ptllnd/ptllnd.c b/lnet/ulnds/ptllnd/ptllnd.c deleted file mode 100644 index 92a436f24354716cbe0f48c1e8aa99299e99bbc5..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd.c +++ /dev/null @@ -1,802 +0,0 @@ - -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -lnd_t the_ptllnd = { - .lnd_type = PTLLND, - .lnd_startup = ptllnd_startup, - .lnd_shutdown = ptllnd_shutdown, - .lnd_ctl = ptllnd_ctl, - .lnd_send = ptllnd_send, - .lnd_recv = ptllnd_recv, - .lnd_eager_recv = ptllnd_eager_recv, - .lnd_notify = ptllnd_notify, - .lnd_wait = ptllnd_wait, - .lnd_setasync = ptllnd_setasync, -}; - -static int ptllnd_ni_count = 0; - -static struct list_head ptllnd_idle_history; -static struct list_head ptllnd_history_list; - -void -ptllnd_history_fini(void) -{ - ptllnd_he_t *he; - - while (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - LIBCFS_FREE(he, sizeof(*he)); - } -} - -int -ptllnd_history_init(void) -{ - int i; - ptllnd_he_t *he; - int n; - int rc; - - CFS_INIT_LIST_HEAD(&ptllnd_idle_history); - CFS_INIT_LIST_HEAD(&ptllnd_history_list); - - rc = ptllnd_parse_int_tunable(&n, "PTLLND_HISTORY", 0); - if (rc != 0) - return rc; - - for (i = 0; i < n; i++) { - LIBCFS_ALLOC(he, sizeof(*he)); - if (he == NULL) { - ptllnd_history_fini(); - return -ENOMEM; - } - - list_add(&he->he_list, &ptllnd_idle_history); - } - - PTLLND_HISTORY("Init"); - - return 0; -} - -void -ptllnd_history(const char *fn, const char *file, const int line, - const char *fmt, ...) -{ - static int seq; - - va_list ap; - ptllnd_he_t *he; - - if (!list_empty(&ptllnd_idle_history)) { - he = list_entry(ptllnd_idle_history.next, - ptllnd_he_t, he_list); - } else if (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - } else { - return; - } - - list_del(&he->he_list); - list_add_tail(&he->he_list, &ptllnd_history_list); - - he->he_seq = seq++; - he->he_fn = fn; - he->he_file = file; - he->he_line = line; - gettimeofday(&he->he_time, NULL); - - va_start(ap, fmt); - vsnprintf(he->he_msg, sizeof(he->he_msg), fmt, ap); - va_end(ap); -} - -void -ptllnd_dump_history(void) -{ - ptllnd_he_t *he; - - PTLLND_HISTORY("dumping..."); - - while (!list_empty(&ptllnd_history_list)) { - he = list_entry(ptllnd_history_list.next, - ptllnd_he_t, he_list); - - list_del(&he->he_list); - - CDEBUG(D_WARNING, "%d %d.%06d (%s:%d:%s()) %s\n", he->he_seq, - (int)he->he_time.tv_sec, (int)he->he_time.tv_usec, - he->he_file, he->he_line, he->he_fn, he->he_msg); - - list_add_tail(&he->he_list, &ptllnd_idle_history); - } - - PTLLND_HISTORY("complete"); -} - -void -ptllnd_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU - * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */ - - - /* Constants... */ - CLASSERT (PTL_RESERVED_MATCHBITS == 0x100); - CLASSERT (LNET_MSG_MATCHBITS == 0); - CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E); - CLASSERT (PTLLND_MSG_VERSION == 0x04); - CLASSERT (PTLLND_RDMA_OK == 0x00); - CLASSERT (PTLLND_RDMA_FAIL == 0x01); - CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00); - CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01); - CLASSERT (PTLLND_MSG_TYPE_GET == 0x02); - CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03); - CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04); - CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05); - CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06); - - /* Checks for struct kptl_msg_t */ - CLASSERT ((int)sizeof(kptl_msg_t) == 136); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12); - - /* Checks for struct kptl_immediate_msg_t */ - CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1); - - /* Checks for struct kptl_rdma_msg_t */ - CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8); - - /* Checks for struct kptl_hello_msg_t */ - CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4); -} - -int -ptllnd_parse_int_tunable(int *value, char *name, int dflt) -{ - char *env = getenv(name); - char *end; - - if (env == NULL) { - *value = dflt; - return 0; - } - - *value = strtoull(env, &end, 0); - if (*end == 0) - return 0; - - CERROR("Can't parse tunable %s=%s\n", name, env); - return -EINVAL; -} - -int -ptllnd_get_tunables(lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int max_msg_size; - int msgs_per_buffer; - int rc; - int temp; - - rc = ptllnd_parse_int_tunable(&plni->plni_portal, - "PTLLND_PORTAL", PTLLND_PORTAL); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&temp, - "PTLLND_PID", PTLLND_PID); - if (rc != 0) - return rc; - plni->plni_ptllnd_pid = (ptl_pid_t)temp; - - rc = ptllnd_parse_int_tunable(&plni->plni_peer_credits, - "PTLLND_PEERCREDITS", PTLLND_PEERCREDITS); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&max_msg_size, - "PTLLND_MAX_MSG_SIZE", - PTLLND_MAX_ULND_MSG_SIZE); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&msgs_per_buffer, - "PTLLND_MSGS_PER_BUFFER", - PTLLND_MSGS_PER_BUFFER); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_msgs_spare, - "PTLLND_MSGS_SPARE", - PTLLND_MSGS_SPARE); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_peer_hash_size, - "PTLLND_PEER_HASH_SIZE", - PTLLND_PEER_HASH_SIZE); - if (rc != 0) - return rc; - - - rc = ptllnd_parse_int_tunable(&plni->plni_eq_size, - "PTLLND_EQ_SIZE", PTLLND_EQ_SIZE); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_checksum, - "PTLLND_CHECKSUM", 0); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_max_tx_history, - "PTLLND_TX_HISTORY", PTLLND_TX_HISTORY); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_abort_on_nak, - "PTLLND_ABORT_ON_NAK", - PTLLND_ABORT_ON_NAK); - if (rc != 0) - return rc; - - rc = ptllnd_parse_int_tunable(&plni->plni_dump_on_nak, - "PTLLND_DUMP_ON_NAK", - PTLLND_DUMP_ON_NAK); - if (rc != 0) - return rc; - - plni->plni_max_msg_size = max_msg_size & ~7; - if (plni->plni_max_msg_size < PTLLND_MIN_BUFFER_SIZE) - plni->plni_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); - - plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer; - - CDEBUG(D_NET, "portal = %d\n",plni->plni_portal); - CDEBUG(D_NET, "ptllnd_pid = %d\n",plni->plni_ptllnd_pid); - CDEBUG(D_NET, "max_msg_size = %d\n",max_msg_size); - CDEBUG(D_NET, "msgs_per_buffer = %d\n",msgs_per_buffer); - CDEBUG(D_NET, "msgs_spare = %d\n",plni->plni_msgs_spare); - CDEBUG(D_NET, "peer_hash_size = %d\n",plni->plni_peer_hash_size); - CDEBUG(D_NET, "eq_size = %d\n",plni->plni_eq_size); - CDEBUG(D_NET, "max_msg_size = %d\n",plni->plni_max_msg_size); - CDEBUG(D_NET, "buffer_size = %d\n",plni->plni_buffer_size); - - return 0; -} - -ptllnd_buffer_t * -ptllnd_create_buffer (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - - LIBCFS_ALLOC(buf, sizeof(*buf)); - if (buf == NULL) { - CERROR("Can't allocate buffer descriptor\n"); - return NULL; - } - - buf->plb_ni = ni; - buf->plb_posted = 0; - CFS_INIT_LIST_HEAD(&buf->plb_list); - - LIBCFS_ALLOC(buf->plb_buffer, plni->plni_buffer_size); - if (buf->plb_buffer == NULL) { - CERROR("Can't allocate buffer size %d\n", - plni->plni_buffer_size); - LIBCFS_FREE(buf, sizeof(*buf)); - return NULL; - } - - list_add(&buf->plb_list, &plni->plni_buffers); - plni->plni_nbuffers++; - - return buf; -} - -void -ptllnd_destroy_buffer (ptllnd_buffer_t *buf) -{ - ptllnd_ni_t *plni = buf->plb_ni->ni_data; - - LASSERT (!buf->plb_posted); - - plni->plni_nbuffers--; - list_del(&buf->plb_list); - LIBCFS_FREE(buf->plb_buffer, plni->plni_buffer_size); - LIBCFS_FREE(buf, sizeof(*buf)); -} - -int -ptllnd_size_buffers (lnet_ni_t *ni, int delta) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - int nmsgs; - int nbufs; - int rc; - - CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); - - plni->plni_nmsgs += delta; - LASSERT(plni->plni_nmsgs >= 0); - - nmsgs = plni->plni_nmsgs + plni->plni_msgs_spare; - - nbufs = (nmsgs * plni->plni_max_msg_size + plni->plni_buffer_size - 1) / - plni->plni_buffer_size; - - while (nbufs > plni->plni_nbuffers) { - buf = ptllnd_create_buffer(ni); - - if (buf == NULL) - return -ENOMEM; - - rc = ptllnd_post_buffer(buf); - if (rc != 0) { - /* TODO - this path seems to orpahn the buffer - * in a state where its not posted and will never be - * However it does not leak the buffer as it's - * already been put onto the global buffer list - * and will be cleaned up - */ - return rc; - } - } - - CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); - return 0; -} - -void -ptllnd_destroy_buffers (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_buffer_t *buf; - struct list_head *tmp; - struct list_head *nxt; - - CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers); - - list_for_each_safe(tmp, nxt, &plni->plni_buffers) { - buf = list_entry(tmp, ptllnd_buffer_t, plb_list); - - //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted); - - LASSERT (plni->plni_nbuffers > 0); - if (buf->plb_posted) { - time_t start = cfs_time_current_sec(); - int w = PTLLND_WARN_LONG_WAIT; - - LASSERT (plni->plni_nposted_buffers > 0); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - (void) PtlMDUnlink(buf->plb_md); - - while (buf->plb_posted) { - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds to unlink buffer\n", w); - w *= 2; - } - ptllnd_wait(ni, w*1000); - } -#else - while (buf->plb_posted) { - rc = PtlMDUnlink(buf->plb_md); - if (rc == PTL_OK) { - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - break; - } - LASSERT (rc == PTL_MD_IN_USE); - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds to unlink buffer\n", w); - w *= 2; - } - ptllnd_wait(ni, w*1000); - } -#endif - } - ptllnd_destroy_buffer(buf); - } - - CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers); - CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers); - - LASSERT (plni->plni_nposted_buffers == 0); - LASSERT (plni->plni_nbuffers == 0); -} - -int -ptllnd_create_peer_hash (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int i; - - plni->plni_npeers = 0; - - LIBCFS_ALLOC(plni->plni_peer_hash, - plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash)); - if (plni->plni_peer_hash == NULL) { - CERROR("Can't allocate ptllnd peer hash (size %d)\n", - plni->plni_peer_hash_size); - return -ENOMEM; - } - - for (i = 0; i < plni->plni_peer_hash_size; i++) - CFS_INIT_LIST_HEAD(&plni->plni_peer_hash[i]); - - return 0; -} - -void -ptllnd_destroy_peer_hash (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int i; - - LASSERT( plni->plni_npeers == 0); - - for (i = 0; i < plni->plni_peer_hash_size; i++) - LASSERT (list_empty(&plni->plni_peer_hash[i])); - - LIBCFS_FREE(plni->plni_peer_hash, - plni->plni_peer_hash_size * sizeof(*plni->plni_peer_hash)); -} - -void -ptllnd_close_peers (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_peer_t *plp; - int i; - - for (i = 0; i < plni->plni_peer_hash_size; i++) - while (!list_empty(&plni->plni_peer_hash[i])) { - plp = list_entry(plni->plni_peer_hash[i].next, - ptllnd_peer_t, plp_list); - - ptllnd_close_peer(plp, 0); - } -} - -int -ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - switch (cmd) { - case IOC_LIBCFS_DEBUG_PEER: - ptllnd_debug_peer(ni, *((lnet_process_id_t *)arg)); - return 0; - - default: - return -EINVAL; - } -} - -__u64 -ptllnd_get_timestamp(void) -{ - struct timeval tv; - int rc = gettimeofday(&tv, NULL); - - LASSERT (rc == 0); - return ((__u64)tv.tv_sec) * 1000000 + tv.tv_usec; -} - -void -ptllnd_shutdown (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - int rc; - time_t start = cfs_time_current_sec(); - int w = PTLLND_WARN_LONG_WAIT; - - LASSERT (ptllnd_ni_count == 1); - plni->plni_max_tx_history = 0; - - ptllnd_cull_tx_history(plni); - - ptllnd_close_peers(ni); - ptllnd_destroy_buffers(ni); - - while (plni->plni_npeers > 0) { - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds for peers to shutdown\n", w); - w *= 2; - } - ptllnd_wait(ni, w*1000); - } - - LASSERT (plni->plni_ntxs == 0); - LASSERT (plni->plni_nrxs == 0); - - rc = PtlEQFree(plni->plni_eqh); - LASSERT (rc == PTL_OK); - - rc = PtlNIFini(plni->plni_nih); - LASSERT (rc == PTL_OK); - - ptllnd_destroy_peer_hash(ni); - LIBCFS_FREE(plni, sizeof(*plni)); - ptllnd_ni_count--; -} - -int -ptllnd_startup (lnet_ni_t *ni) -{ - ptllnd_ni_t *plni; - int rc; - - /* could get limits from portals I guess... */ - ni->ni_maxtxcredits = - ni->ni_peertxcredits = 1000; - - if (ptllnd_ni_count != 0) { - CERROR("Can't have > 1 instance of ptllnd\n"); - return -EPERM; - } - - ptllnd_ni_count++; - - rc = ptllnd_history_init(); - if (rc != 0) { - CERROR("Can't init history\n"); - goto failed0; - } - - LIBCFS_ALLOC(plni, sizeof(*plni)); - if (plni == NULL) { - CERROR("Can't allocate ptllnd state\n"); - rc = -ENOMEM; - goto failed0; - } - - ni->ni_data = plni; - - plni->plni_stamp = ptllnd_get_timestamp(); - plni->plni_nrxs = 0; - plni->plni_ntxs = 0; - plni->plni_ntx_history = 0; - CFS_INIT_LIST_HEAD(&plni->plni_zombie_txs); - CFS_INIT_LIST_HEAD(&plni->plni_tx_history); - - /* - * Initilize buffer related data structures - */ - CFS_INIT_LIST_HEAD(&plni->plni_buffers); - plni->plni_nbuffers = 0; - plni->plni_nposted_buffers = 0; - - rc = ptllnd_get_tunables(ni); - if (rc != 0) - goto failed1; - - rc = ptllnd_create_peer_hash(ni); - if (rc != 0) - goto failed1; - - /* NB I most probably won't get the PID I requested here. It doesn't - * matter because I don't need a fixed PID (only connection acceptors - * need a "well known" PID). */ - - rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid, - NULL, NULL, &plni->plni_nih); - if (rc != PTL_OK && rc != PTL_IFACE_DUP) { - CERROR("PtlNIInit failed: %d\n", rc); - rc = -ENODEV; - goto failed2; - } - - rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size, - PTL_EQ_HANDLER_NONE, &plni->plni_eqh); - if (rc != PTL_OK) { - CERROR("PtlEQAlloc failed: %d\n", rc); - rc = -ENODEV; - goto failed3; - } - - /* - * Fetch the Portals NID - */ - if(rc != PtlGetId(plni->plni_nih,&plni->plni_portals_id)){ - CERROR ("PtlGetID failed : %d\n", rc); - rc = -EINVAL; - goto failed4; - } - - CDEBUG(D_NET, "lnet nid=" LPX64 " (passed in)\n",ni->ni_nid); - - /* - * Create the new NID. Based on the LND network type - * and the lower ni's address data. - */ - ni->ni_nid = ptllnd_ptl2lnetnid(ni, plni->plni_portals_id.nid); - - CDEBUG(D_NET, "ptl id =%s\n", ptllnd_ptlid2str(plni->plni_portals_id)); - CDEBUG(D_NET, "lnet id =%s (passed back)\n", - libcfs_id2str((lnet_process_id_t) { - .nid = ni->ni_nid, .pid = the_lnet.ln_pid})); - - rc = ptllnd_size_buffers(ni, 0); - if (rc != 0) - goto failed4; - - return 0; - - failed4: - ptllnd_destroy_buffers(ni); - PtlEQFree(plni->plni_eqh); - failed3: - PtlNIFini(plni->plni_nih); - failed2: - ptllnd_destroy_peer_hash(ni); - failed1: - LIBCFS_FREE(plni, sizeof(*plni)); - failed0: - ptllnd_history_fini(); - ptllnd_ni_count--; - CDEBUG(D_NET, "<<< rc=%d\n",rc); - return rc; -} - -const char *ptllnd_evtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_EVENT_GET_START); - DO_TYPE(PTL_EVENT_GET_END); - DO_TYPE(PTL_EVENT_PUT_START); - DO_TYPE(PTL_EVENT_PUT_END); - DO_TYPE(PTL_EVENT_REPLY_START); - DO_TYPE(PTL_EVENT_REPLY_END); - DO_TYPE(PTL_EVENT_ACK); - DO_TYPE(PTL_EVENT_SEND_START); - DO_TYPE(PTL_EVENT_SEND_END); - DO_TYPE(PTL_EVENT_UNLINK); - default: - return "<unknown event type>"; - } -#undef DO_TYPE -} - -const char *ptllnd_msgtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTLLND_MSG_TYPE_INVALID); - DO_TYPE(PTLLND_MSG_TYPE_PUT); - DO_TYPE(PTLLND_MSG_TYPE_GET); - DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); - DO_TYPE(PTLLND_MSG_TYPE_HELLO); - DO_TYPE(PTLLND_MSG_TYPE_NOOP); - DO_TYPE(PTLLND_MSG_TYPE_NAK); - default: - return "<unknown msg type>"; - } -#undef DO_TYPE -} - -const char *ptllnd_errtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_OK); - DO_TYPE(PTL_SEGV); - DO_TYPE(PTL_NO_SPACE); - DO_TYPE(PTL_ME_IN_USE); - DO_TYPE(PTL_NAL_FAILED); - DO_TYPE(PTL_NO_INIT); - DO_TYPE(PTL_IFACE_DUP); - DO_TYPE(PTL_IFACE_INVALID); - DO_TYPE(PTL_HANDLE_INVALID); - DO_TYPE(PTL_MD_INVALID); - DO_TYPE(PTL_ME_INVALID); - DO_TYPE(PTL_PROCESS_INVALID); - DO_TYPE(PTL_PT_INDEX_INVALID); - DO_TYPE(PTL_SR_INDEX_INVALID); - DO_TYPE(PTL_EQ_INVALID); - DO_TYPE(PTL_EQ_DROPPED); - DO_TYPE(PTL_EQ_EMPTY); - DO_TYPE(PTL_MD_NO_UPDATE); - DO_TYPE(PTL_FAIL); - DO_TYPE(PTL_AC_INDEX_INVALID); - DO_TYPE(PTL_MD_ILLEGAL); - DO_TYPE(PTL_ME_LIST_TOO_LONG); - DO_TYPE(PTL_MD_IN_USE); - DO_TYPE(PTL_NI_INVALID); - DO_TYPE(PTL_PID_INVALID); - DO_TYPE(PTL_PT_FULL); - DO_TYPE(PTL_VAL_FAILED); - DO_TYPE(PTL_NOT_IMPLEMENTED); - DO_TYPE(PTL_NO_ACK); - DO_TYPE(PTL_EQ_IN_USE); - DO_TYPE(PTL_PID_IN_USE); - DO_TYPE(PTL_INV_EQ_SIZE); - DO_TYPE(PTL_AGAIN); - default: - return "<unknown error type>"; - } -#undef DO_TYPE -} diff --git a/lnet/ulnds/ptllnd/ptllnd.h b/lnet/ulnds/ptllnd/ptllnd.h deleted file mode 100644 index fa71506107eeea8b2fefb4c3079162d390369072..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd.h +++ /dev/null @@ -1,302 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - - -#define DEBUG_SUBSYSTEM S_LND - -#include <lnet/lib-lnet.h> -#include <lnet/ptllnd_wire.h> - -#include <portals/p30.h> -#include <lnet/ptllnd.h> /* Depends on portals/p30.h */ -#include <stdarg.h> - -#define PTLLND_DEBUG_TIMING 0 - -#define PTLLND_MSGS_PER_BUFFER 64 -#define PTLLND_MSGS_SPARE 256 -#define PTLLND_PEER_HASH_SIZE 101 -#define PTLLND_EQ_SIZE 1024 -#if PTLLND_DEBUG_TIMING -# define PTLLND_TX_HISTORY 1024 -#else -# define PTLLND_TX_HISTORY 0 -#endif -#define PTLLND_WARN_LONG_WAIT 5 /* seconds */ -#define PTLLND_ABORT_ON_NAK 1 /* abort app on (e.g.) protocol version mismatch */ -#define PTLLND_DUMP_ON_NAK 0 /* dump debug? */ - - -/* Hack to record history - * This should really be done by CDEBUG(D_NETTRACE... */ - -typedef struct { - struct list_head he_list; - struct timeval he_time; - const char *he_fn; - const char *he_file; - int he_seq; - int he_line; - char he_msg[80]; -} ptllnd_he_t; - -void ptllnd_dump_history(); -void ptllnd_history(const char *fn, const char *file, const int line, - const char *fmt, ...); -#define PTLLND_HISTORY(fmt, a...) \ - ptllnd_history(__FUNCTION__, __FILE__, __LINE__, fmt, ## a) - - -#define PTLLND_MD_OPTIONS (PTL_MD_LUSTRE_COMPLETION_SEMANTICS |\ - PTL_MD_EVENT_START_DISABLE) -typedef struct -{ - int plni_portal; - ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */ - int plni_peer_credits; - int plni_max_msg_size; - int plni_buffer_size; - int plni_msgs_spare; - int plni_peer_hash_size; - int plni_eq_size; - int plni_checksum; - int plni_max_tx_history; - int plni_abort_on_nak; - int plni_dump_on_nak; - - __u64 plni_stamp; - struct list_head plni_active_txs; - struct list_head plni_zombie_txs; - int plni_ntxs; - int plni_nrxs; - - ptl_handle_ni_t plni_nih; - ptl_handle_eq_t plni_eqh; - ptl_process_id_t plni_portals_id; /* Portals ID of interface */ - - struct list_head *plni_peer_hash; - int plni_npeers; - - struct list_head plni_tx_history; - int plni_ntx_history; - - struct list_head plni_buffers; - int plni_nbuffers; - int plni_nposted_buffers; - int plni_nmsgs; -} ptllnd_ni_t; - -#define PTLLND_CREDIT_HIGHWATER(plni) ((plni)->plni_peer_credits - 1) - -typedef struct -{ - struct list_head plp_list; - lnet_ni_t *plp_ni; - lnet_process_id_t plp_id; - ptl_process_id_t plp_ptlid; - int plp_credits; /* # msg buffers reserved for me at peer */ - - /* credits for msg buffers I've posted for this peer... - * outstanding - free buffers I've still to inform my peer about - * sent - free buffers I've told my peer about - * lazy - additional buffers (over and above plni_peer_credits) - * posted to prevent peer blocking on sending a non-RDMA - * messages to me when LNET isn't eagerly responsive to - * the network (i.e. liblustre doesn't have control). - * extra_lazy - lazy credits not required any more. */ - int plp_outstanding_credits; - int plp_sent_credits; - int plp_lazy_credits; - int plp_extra_lazy_credits; - - int plp_max_msg_size; - int plp_refcount; - int plp_recvd_hello:1; - int plp_closing:1; - __u64 plp_match; - __u64 plp_stamp; - struct list_head plp_txq; - struct list_head plp_activeq; -} ptllnd_peer_t; - -typedef struct -{ - struct list_head plb_list; - lnet_ni_t *plb_ni; - int plb_posted; - ptl_handle_md_t plb_md; - char *plb_buffer; -} ptllnd_buffer_t; - -typedef struct -{ - ptllnd_peer_t *rx_peer; - kptl_msg_t *rx_msg; - int rx_nob; -} ptllnd_rx_t; - -typedef struct -{ - struct list_head tx_list; - int tx_type; - int tx_status; - ptllnd_peer_t *tx_peer; - lnet_msg_t *tx_lnetmsg; - lnet_msg_t *tx_lnetreplymsg; - unsigned int tx_niov; - ptl_md_iovec_t *tx_iov; - ptl_handle_md_t tx_bulkmdh; - ptl_handle_md_t tx_reqmdh; -#if PTLLND_DEBUG_TIMING - struct timeval tx_bulk_posted; - struct timeval tx_bulk_done; - struct timeval tx_req_posted; - struct timeval tx_req_done; -#endif - int tx_completing; /* someone already completing */ - int tx_msgsize; /* # bytes in tx_msg */ - kptl_msg_t tx_msg; /* message to send */ -} ptllnd_tx_t; - -#define PTLLND_RDMA_WRITE 0x100 /* pseudo message type */ -#define PTLLND_RDMA_READ 0x101 /* (no msg actually sent) */ - -/* Hack to extract object type from event's user_ptr relies on (and checks) - * that structs are somewhat aligned. */ -#define PTLLND_EVENTARG_TYPE_TX 0x1 -#define PTLLND_EVENTARG_TYPE_BUF 0x2 -#define PTLLND_EVENTARG_TYPE_MASK 0x3 - -static inline void * -ptllnd_obj2eventarg (void *obj, int type) -{ - unsigned long ptr = (unsigned long)obj; - - LASSERT ((ptr & PTLLND_EVENTARG_TYPE_MASK) == 0); - LASSERT ((type & ~PTLLND_EVENTARG_TYPE_MASK) == 0); - - return (void *)(ptr | type); -} - -static inline int -ptllnd_eventarg2type (void *arg) -{ - unsigned long ptr = (unsigned long)arg; - - return (ptr & PTLLND_EVENTARG_TYPE_MASK); -} - -static inline void * -ptllnd_eventarg2obj (void *arg) -{ - unsigned long ptr = (unsigned long)arg; - - return (void *)(ptr & ~PTLLND_EVENTARG_TYPE_MASK); -} - -#if PTLLND_DEBUG_TIMING -# define PTLLND_DBGT_INIT(tv) memset(&(tv), 0, sizeof(tv)) -# define PTLLND_DBGT_STAMP(tv) gettimeofday(&(tv), NULL) -# define DBGT_FMT "%ld.%06ld" -# define DBGT_ARGS(tv) , (long)((tv).tv_sec), (long)((tv).tv_usec) -#else -# define PTLLND_DBGT_INIT(tv) -# define PTLLND_DBGT_STAMP(tv) -# define DBGT_FMT "-" -# define DBGT_ARGS(tv) -#endif - -int ptllnd_parse_int_tunable(int *value, char *name, int dflt); -void ptllnd_cull_tx_history(ptllnd_ni_t *plni); -int ptllnd_startup(lnet_ni_t *ni); -void ptllnd_shutdown(lnet_ni_t *ni); -int ptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -int ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg); -int ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - void **new_privatep); - -ptllnd_tx_t *ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob); -void ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive); -int ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int n); -void ptllnd_wait(lnet_ni_t *ni, int milliseconds); -void ptllnd_check_sends(ptllnd_peer_t *peer); -void ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id); -void ptllnd_destroy_peer(ptllnd_peer_t *peer); -void ptllnd_close_peer(ptllnd_peer_t *peer, int error); -int ptllnd_post_buffer(ptllnd_buffer_t *buf); -int ptllnd_size_buffers (lnet_ni_t *ni, int delta); -const char *ptllnd_evtype2str(int type); -const char *ptllnd_msgtype2str(int type); -const char *ptllnd_errtype2str(int type); -char *ptllnd_ptlid2str(ptl_process_id_t id); - -static inline void -ptllnd_peer_addref (ptllnd_peer_t *peer) -{ - LASSERT (peer->plp_refcount > 0); - peer->plp_refcount++; -} - -static inline void -ptllnd_peer_decref (ptllnd_peer_t *peer) -{ - LASSERT (peer->plp_refcount > 0); - peer->plp_refcount--; - if (peer->plp_refcount == 0) - ptllnd_destroy_peer(peer); -} - -static inline void -ptllnd_post_tx(ptllnd_tx_t *tx) -{ - ptllnd_peer_t *peer = tx->tx_peer; - LASSERT(tx->tx_peer != NULL); - list_add_tail(&tx->tx_list, &peer->plp_txq); - ptllnd_check_sends(peer); -} - -static inline lnet_nid_t -ptllnd_ptl2lnetnid(lnet_ni_t *ni, ptl_nid_t portals_nid) -{ - return LNET_MKNID(LNET_NIDNET(ni->ni_nid), portals_nid); -} - -static inline ptl_nid_t -ptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) -{ - return LNET_NIDADDR(lnet_nid); -} - -/* - * A note about lprintf(): - * Normally printf() is redirected to stdout of the console - * from which yod launched the catamount application. However - * there is a lot of initilziation code that runs before this - * redirection is hooked up, and printf() seems to go to the bit bucket - * - * To get any kind of debug output and init time lprintf() can - * be used to output to the console from which bookqk was used to - * boot the catamount node. This works for debugging some simple - * cases. - */ - - diff --git a/lnet/ulnds/ptllnd/ptllnd_cb.c b/lnet/ulnds/ptllnd/ptllnd_cb.c deleted file mode 100644 index 96b0345cbf1d8bf8264335a4ff8d86704647d8dc..0000000000000000000000000000000000000000 --- a/lnet/ulnds/ptllnd/ptllnd_cb.c +++ /dev/null @@ -1,1756 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2005 Cluster File Systems, Inc. All rights reserved. - * Author: Eric Barton <eeb@bartonsoftware.com> - * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. - * - * This file is confidential source code owned by Cluster File Systems. - * No viewing, modification, compilation, redistribution, or any other - * form of use is permitted except through a signed license agreement. - * - * If you have not signed such an agreement, then you have no rights to - * this file. Please destroy it immediately and contact CFS. - * - */ - -#include "ptllnd.h" - -char * -ptllnd_ptlid2str(ptl_process_id_t id) -{ - static char strs[8][32]; - static int idx = 0; - - char *str = strs[idx++]; - - if (idx >= sizeof(strs)/sizeof(strs[0])) - idx = 0; - - snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid); - return str; -} - -void -ptllnd_destroy_peer(ptllnd_peer_t *peer) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - int nmsg = peer->plp_lazy_credits + - plni->plni_peer_credits; - - ptllnd_size_buffers(ni, -nmsg); - - LASSERT (peer->plp_closing); - LASSERT (plni->plni_npeers > 0); - LASSERT (list_empty(&peer->plp_txq)); - LASSERT (list_empty(&peer->plp_activeq)); - plni->plni_npeers--; - LIBCFS_FREE(peer, sizeof(*peer)); -} - -void -ptllnd_abort_txs(ptllnd_ni_t *plni, struct list_head *q) -{ - while (!list_empty(q)) { - ptllnd_tx_t *tx = list_entry(q->next, ptllnd_tx_t, tx_list); - - tx->tx_status = -ESHUTDOWN; - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &plni->plni_zombie_txs); - } -} - -void -ptllnd_close_peer(ptllnd_peer_t *peer, int error) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - if (peer->plp_closing) - return; - - peer->plp_closing = 1; - - if (!list_empty(&peer->plp_txq) || - !list_empty(&peer->plp_activeq) || - error != 0) { - CERROR("Closing %s\n", libcfs_id2str(peer->plp_id)); - ptllnd_debug_peer(ni, peer->plp_id); - } - - ptllnd_abort_txs(plni, &peer->plp_txq); - ptllnd_abort_txs(plni, &peer->plp_activeq); - - list_del(&peer->plp_list); - ptllnd_peer_decref(peer); -} - -ptllnd_peer_t * -ptllnd_find_peer(lnet_ni_t *ni, lnet_process_id_t id, int create) -{ - ptllnd_ni_t *plni = ni->ni_data; - unsigned int hash = LNET_NIDADDR(id.nid) % plni->plni_peer_hash_size; - struct list_head *tmp; - ptllnd_peer_t *plp; - ptllnd_tx_t *tx; - int rc; - - LASSERT (LNET_NIDNET(id.nid) == LNET_NIDNET(ni->ni_nid)); - - list_for_each(tmp, &plni->plni_peer_hash[hash]) { - plp = list_entry(tmp, ptllnd_peer_t, plp_list); - - if (plp->plp_id.nid == id.nid && - plp->plp_id.pid == id.pid) { - ptllnd_peer_addref(plp); - return plp; - } - } - - if (!create) - return NULL; - - /* New peer: check first for enough posted buffers */ - plni->plni_npeers++; - rc = ptllnd_size_buffers(ni, plni->plni_peer_credits); - if (rc != 0) { - plni->plni_npeers--; - return NULL; - } - - LIBCFS_ALLOC(plp, sizeof(*plp)); - if (plp == NULL) { - CERROR("Can't allocate new peer %s\n", libcfs_id2str(id)); - plni->plni_npeers--; - ptllnd_size_buffers(ni, -plni->plni_peer_credits); - return NULL; - } - - plp->plp_ni = ni; - plp->plp_id = id; - plp->plp_ptlid.nid = LNET_NIDADDR(id.nid); - plp->plp_ptlid.pid = plni->plni_ptllnd_pid; - plp->plp_credits = 1; /* add more later when she gives me credits */ - plp->plp_max_msg_size = plni->plni_max_msg_size; /* until I hear from her */ - plp->plp_sent_credits = 1; /* Implicit credit for HELLO */ - plp->plp_outstanding_credits = plni->plni_peer_credits - 1; - plp->plp_lazy_credits = 0; - plp->plp_extra_lazy_credits = 0; - plp->plp_match = 0; - plp->plp_stamp = 0; - plp->plp_recvd_hello = 0; - plp->plp_closing = 0; - plp->plp_refcount = 1; - CFS_INIT_LIST_HEAD(&plp->plp_list); - CFS_INIT_LIST_HEAD(&plp->plp_txq); - CFS_INIT_LIST_HEAD(&plp->plp_activeq); - - ptllnd_peer_addref(plp); - list_add_tail(&plp->plp_list, &plni->plni_peer_hash[hash]); - - tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_HELLO, 0); - if (tx == NULL) { - CERROR("Can't send HELLO to %s\n", libcfs_id2str(id)); - ptllnd_close_peer(plp, -ENOMEM); - ptllnd_peer_decref(plp); - return NULL; - } - - tx->tx_msg.ptlm_u.hello.kptlhm_matchbits = PTL_RESERVED_MATCHBITS; - tx->tx_msg.ptlm_u.hello.kptlhm_max_msg_size = plni->plni_max_msg_size; - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post hello %p", libcfs_id2str(id), - tx->tx_peer->plp_credits, - tx->tx_peer->plp_outstanding_credits, - tx->tx_peer->plp_sent_credits, - plni->plni_peer_credits + - tx->tx_peer->plp_lazy_credits, tx); - ptllnd_post_tx(tx); - - return plp; -} - -int -ptllnd_count_q(struct list_head *q) -{ - struct list_head *e; - int n = 0; - - list_for_each(e, q) { - n++; - } - - return n; -} - -const char * -ptllnd_tx_typestr(int type) -{ - switch (type) { - case PTLLND_RDMA_WRITE: - return "rdma_write"; - - case PTLLND_RDMA_READ: - return "rdma_read"; - - case PTLLND_MSG_TYPE_PUT: - return "put_req"; - - case PTLLND_MSG_TYPE_GET: - return "get_req"; - - case PTLLND_MSG_TYPE_IMMEDIATE: - return "immediate"; - - case PTLLND_MSG_TYPE_NOOP: - return "noop"; - - case PTLLND_MSG_TYPE_HELLO: - return "hello"; - - default: - return "<unknown>"; - } -} - -void -ptllnd_debug_tx(ptllnd_tx_t *tx) -{ - CDEBUG(D_WARNING, "%s %s b "DBGT_FMT"/"DBGT_FMT - " r "DBGT_FMT"/"DBGT_FMT" status %d\n", - ptllnd_tx_typestr(tx->tx_type), - libcfs_id2str(tx->tx_peer->plp_id) - DBGT_ARGS(tx->tx_bulk_posted) DBGT_ARGS(tx->tx_bulk_done) - DBGT_ARGS(tx->tx_req_posted) DBGT_ARGS(tx->tx_req_done), - tx->tx_status); -} - -void -ptllnd_debug_peer(lnet_ni_t *ni, lnet_process_id_t id) -{ - ptllnd_peer_t *plp = ptllnd_find_peer(ni, id, 0); - struct list_head *tmp; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - - if (plp == NULL) { - CDEBUG(D_WARNING, "No peer %s\n", libcfs_id2str(id)); - return; - } - - CDEBUG(D_WARNING, "%s %s%s [%d] "LPD64".%06d m "LPD64" q %d/%d c %d/%d+%d(%d)\n", - libcfs_id2str(id), - plp->plp_recvd_hello ? "H" : "_", - plp->plp_closing ? "C" : "_", - plp->plp_refcount, - plp->plp_stamp / 1000000, (int)(plp->plp_stamp % 1000000), - plp->plp_match, - ptllnd_count_q(&plp->plp_txq), - ptllnd_count_q(&plp->plp_activeq), - plp->plp_credits, plp->plp_outstanding_credits, plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits); - - CDEBUG(D_WARNING, "txq:\n"); - list_for_each (tmp, &plp->plp_txq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "activeq:\n"); - list_for_each (tmp, &plp->plp_activeq) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "zombies:\n"); - list_for_each (tmp, &plni->plni_zombie_txs) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_peer->plp_id.nid == id.nid && - tx->tx_peer->plp_id.pid == id.pid) - ptllnd_debug_tx(tx); - } - - CDEBUG(D_WARNING, "history:\n"); - list_for_each (tmp, &plni->plni_tx_history) { - tx = list_entry(tmp, ptllnd_tx_t, tx_list); - - if (tx->tx_peer->plp_id.nid == id.nid && - tx->tx_peer->plp_id.pid == id.pid) - ptllnd_debug_tx(tx); - } - - ptllnd_peer_decref(plp); - ptllnd_dump_history(); -} - -void -ptllnd_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive) -{ - lnet_process_id_t id; - ptllnd_peer_t *peer; - time_t start = cfs_time_current_sec(); - int w = PTLLND_WARN_LONG_WAIT; - - /* This is only actually used to connect to routers at startup! */ - if (!alive) { - LBUG(); - return; - } - - id.nid = nid; - id.pid = LUSTRE_SRV_LNET_PID; - - peer = ptllnd_find_peer(ni, id, 1); - if (peer == NULL) - return; - - /* wait for the peer to reply */ - while (!peer->plp_recvd_hello) { - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds to connect to %s\n", - w, libcfs_id2str(id)); - w *= 2; - } - - ptllnd_wait(ni, w*1000); - } - - ptllnd_peer_decref(peer); -} - -int -ptllnd_setasync(lnet_ni_t *ni, lnet_process_id_t id, int nasync) -{ - ptllnd_peer_t *peer = ptllnd_find_peer(ni, id, nasync > 0); - int rc; - - if (peer == NULL) - return -ENOMEM; - - LASSERT (peer->plp_lazy_credits >= 0); - LASSERT (peer->plp_extra_lazy_credits >= 0); - - /* If nasync < 0, we're being told we can reduce the total message - * headroom. We can't do this right now because our peer might already - * have credits for the extra buffers, so we just account the extra - * headroom in case we need it later and only destroy buffers when the - * peer closes. - * - * Note that the following condition handles this case, where it - * actually increases the extra lazy credit counter. */ - - if (nasync <= peer->plp_extra_lazy_credits) { - peer->plp_extra_lazy_credits -= nasync; - return 0; - } - - LASSERT (nasync > 0); - - nasync -= peer->plp_extra_lazy_credits; - peer->plp_extra_lazy_credits = 0; - - rc = ptllnd_size_buffers(ni, nasync); - if (rc == 0) { - peer->plp_lazy_credits += nasync; - peer->plp_outstanding_credits += nasync; - } - - return rc; -} - -__u32 -ptllnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -ptllnd_tx_t * -ptllnd_new_tx(ptllnd_peer_t *peer, int type, int payload_nob) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - int msgsize; - - CDEBUG(D_NET, "peer=%p type=%d payload=%d\n", peer, type, payload_nob); - - switch (type) { - default: - LBUG(); - - case PTLLND_RDMA_WRITE: - case PTLLND_RDMA_READ: - LASSERT (payload_nob == 0); - msgsize = 0; - break; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u) + - sizeof(kptl_rdma_msg_t); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - msgsize = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload[payload_nob]); - break; - - case PTLLND_MSG_TYPE_NOOP: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u); - break; - - case PTLLND_MSG_TYPE_HELLO: - LASSERT (payload_nob == 0); - msgsize = offsetof(kptl_msg_t, ptlm_u) + - sizeof(kptl_hello_msg_t); - break; - } - - msgsize = (msgsize + 7) & ~7; - LASSERT (msgsize <= peer->plp_max_msg_size); - - LIBCFS_ALLOC(tx, offsetof(ptllnd_tx_t, tx_msg) + msgsize); - - if (tx == NULL) { - CERROR("Can't allocate msg type %d for %s\n", - type, libcfs_id2str(peer->plp_id)); - return NULL; - } - - CFS_INIT_LIST_HEAD(&tx->tx_list); - tx->tx_peer = peer; - tx->tx_type = type; - tx->tx_lnetmsg = tx->tx_lnetreplymsg = NULL; - tx->tx_niov = 0; - tx->tx_iov = NULL; - tx->tx_reqmdh = PTL_INVALID_HANDLE; - tx->tx_bulkmdh = PTL_INVALID_HANDLE; - tx->tx_msgsize = msgsize; - tx->tx_completing = 0; - tx->tx_status = 0; - - PTLLND_DBGT_INIT(tx->tx_bulk_posted); - PTLLND_DBGT_INIT(tx->tx_bulk_done); - PTLLND_DBGT_INIT(tx->tx_req_posted); - PTLLND_DBGT_INIT(tx->tx_req_done); - - if (msgsize != 0) { - tx->tx_msg.ptlm_magic = PTLLND_MSG_MAGIC; - tx->tx_msg.ptlm_version = PTLLND_MSG_VERSION; - tx->tx_msg.ptlm_type = type; - tx->tx_msg.ptlm_credits = 0; - tx->tx_msg.ptlm_nob = msgsize; - tx->tx_msg.ptlm_cksum = 0; - tx->tx_msg.ptlm_srcnid = ni->ni_nid; - tx->tx_msg.ptlm_srcstamp = plni->plni_stamp; - tx->tx_msg.ptlm_dstnid = peer->plp_id.nid; - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - tx->tx_msg.ptlm_srcpid = the_lnet.ln_pid; - tx->tx_msg.ptlm_dstpid = peer->plp_id.pid; - } - - ptllnd_peer_addref(peer); - plni->plni_ntxs++; - - CDEBUG(D_NET, "tx=%p\n",tx); - - return tx; -} - -void -ptllnd_abort_tx(ptllnd_tx_t *tx, ptl_handle_md_t *mdh) -{ - ptllnd_peer_t *peer = tx->tx_peer; - lnet_ni_t *ni = peer->plp_ni; - int rc; - time_t start = cfs_time_current_sec(); - int w = PTLLND_WARN_LONG_WAIT; - - while (!PtlHandleIsEqual(*mdh, PTL_INVALID_HANDLE)) { - rc = PtlMDUnlink(*mdh); -#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS - if (rc == PTL_OK) /* unlink successful => no unlinked event */ - return; - LASSERT (rc == PTL_MD_IN_USE); -#endif - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds to abort tx to %s\n", - w, libcfs_id2str(peer->plp_id)); - w *= 2; - } - /* Wait for ptllnd_tx_event() to invalidate */ - ptllnd_wait(ni, w*1000); - } -} - -void -ptllnd_cull_tx_history(ptllnd_ni_t *plni) -{ - int max = plni->plni_max_tx_history; - - while (plni->plni_ntx_history > max) { - ptllnd_tx_t *tx = list_entry(plni->plni_tx_history.next, - ptllnd_tx_t, tx_list); - list_del(&tx->tx_list); - - ptllnd_peer_decref(tx->tx_peer); - - LIBCFS_FREE(tx, offsetof(ptllnd_tx_t, tx_msg) + tx->tx_msgsize); - - LASSERT (plni->plni_ntxs > 0); - plni->plni_ntxs--; - plni->plni_ntx_history--; - } -} - -void -ptllnd_tx_done(ptllnd_tx_t *tx) -{ - ptllnd_peer_t *peer = tx->tx_peer; - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - /* CAVEAT EMPTOR: If this tx is being aborted, I'll continue to get - * events for this tx until it's unlinked. So I set tx_completing to - * flag the tx is getting handled */ - - if (tx->tx_completing) - return; - - tx->tx_completing = 1; - - if (!list_empty(&tx->tx_list)) - list_del_init(&tx->tx_list); - - if (tx->tx_status != 0) { - CERROR("Completing tx with error\n"); - ptllnd_debug_tx(tx); - ptllnd_close_peer(peer, tx->tx_status); - } - - ptllnd_abort_tx(tx, &tx->tx_reqmdh); - ptllnd_abort_tx(tx, &tx->tx_bulkmdh); - - if (tx->tx_niov > 0) { - LIBCFS_FREE(tx->tx_iov, tx->tx_niov * sizeof(*tx->tx_iov)); - tx->tx_niov = 0; - } - - if (tx->tx_lnetreplymsg != NULL) { - LASSERT (tx->tx_type == PTLLND_MSG_TYPE_GET); - LASSERT (tx->tx_lnetmsg != NULL); - /* Simulate GET success always */ - lnet_finalize(ni, tx->tx_lnetmsg, 0); - CDEBUG(D_NET, "lnet_finalize(tx_lnetreplymsg=%p)\n",tx->tx_lnetreplymsg); - lnet_finalize(ni, tx->tx_lnetreplymsg, tx->tx_status); - } else if (tx->tx_lnetmsg != NULL) { - lnet_finalize(ni, tx->tx_lnetmsg, tx->tx_status); - } - - plni->plni_ntx_history++; - list_add_tail(&tx->tx_list, &plni->plni_tx_history); - - ptllnd_cull_tx_history(plni); -} - -int -ptllnd_set_txiov(ptllnd_tx_t *tx, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - ptl_md_iovec_t *piov; - int npiov; - - if (len == 0) { - tx->tx_niov = 0; - return 0; - } - - /* - * Remove iovec's at the beginning that - * are skipped because of the offset. - * Adjust the offset accordingly - */ - for (;;) { - LASSERT (niov > 0); - if (offset < iov->iov_len) - break; - offset -= iov->iov_len; - niov--; - iov++; - } - - for (;;) { - int temp_offset = offset; - int resid = len; - LIBCFS_ALLOC(piov, niov * sizeof(*piov)); - if (piov == NULL) - return -ENOMEM; - - for (npiov = 0;; npiov++) { - LASSERT (npiov < niov); - LASSERT (iov->iov_len >= temp_offset); - - piov[npiov].iov_base = iov[npiov].iov_base + temp_offset; - piov[npiov].iov_len = iov[npiov].iov_len - temp_offset; - - if (piov[npiov].iov_len >= resid) { - piov[npiov].iov_len = resid; - npiov++; - break; - } - resid -= piov[npiov].iov_len; - temp_offset = 0; - } - - if (npiov == niov) { - tx->tx_niov = niov; - tx->tx_iov = piov; - return 0; - } - - /* Dang! The piov I allocated was too big and it's a drag to - * have to maintain separate 'allocated' and 'used' sizes, so - * I'll just do it again; NB this doesn't happen normally... */ - LIBCFS_FREE(piov, niov * sizeof(*piov)); - niov = npiov; - } -} - -void -ptllnd_set_md_buffer(ptl_md_t *md, ptllnd_tx_t *tx) -{ - unsigned int niov = tx->tx_niov; - ptl_md_iovec_t *iov = tx->tx_iov; - - LASSERT ((md->options & PTL_MD_IOVEC) == 0); - - if (niov == 0) { - md->start = NULL; - md->length = 0; - } else if (niov == 1) { - md->start = iov[0].iov_base; - md->length = iov[0].iov_len; - } else { - md->start = iov; - md->length = niov; - md->options |= PTL_MD_IOVEC; - } -} - -int -ptllnd_post_buffer(ptllnd_buffer_t *buf) -{ - lnet_ni_t *ni = buf->plb_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptl_process_id_t anyid = { - .nid = PTL_NID_ANY, - .pid = PTL_PID_ANY}; - ptl_md_t md = { - .start = buf->plb_buffer, - .length = plni->plni_buffer_size, - .threshold = PTL_MD_THRESH_INF, - .max_size = plni->plni_max_msg_size, - .options = (PTLLND_MD_OPTIONS | - PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | - PTL_MD_LOCAL_ALIGN8), - .user_ptr = ptllnd_obj2eventarg(buf, PTLLND_EVENTARG_TYPE_BUF), - .eq_handle = plni->plni_eqh}; - ptl_handle_me_t meh; - int rc; - - LASSERT (!buf->plb_posted); - - rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, - anyid, LNET_MSG_MATCHBITS, 0, - PTL_UNLINK, PTL_INS_AFTER, &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - return -ENOMEM; - } - - buf->plb_posted = 1; - plni->plni_nposted_buffers++; - - rc = PtlMDAttach(meh, md, LNET_UNLINK, &buf->plb_md); - if (rc == PTL_OK) - return 0; - - CERROR("PtlMDAttach failed: %d\n", rc); - - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - - rc = PtlMEUnlink(meh); - LASSERT (rc == PTL_OK); - - return -ENOMEM; -} - -void -ptllnd_check_sends(ptllnd_peer_t *peer) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - ptl_md_t md; - ptl_handle_md_t mdh; - int rc; - - CDEBUG(D_NET, "%s: [%d/%d+%d(%d)\n", - libcfs_id2str(peer->plp_id), peer->plp_credits, - peer->plp_outstanding_credits, peer->plp_sent_credits, - plni->plni_peer_credits + peer->plp_lazy_credits); - - if (list_empty(&peer->plp_txq) && - peer->plp_outstanding_credits >= PTLLND_CREDIT_HIGHWATER(plni) && - peer->plp_credits != 0) { - - tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0); - CDEBUG(D_NET, "NOOP tx=%p\n",tx); - if (tx == NULL) { - CERROR("Can't return credits to %s\n", - libcfs_id2str(peer->plp_id)); - } else { - list_add_tail(&tx->tx_list, &peer->plp_txq); - } - } - - while (!list_empty(&peer->plp_txq)) { - tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list); - - LASSERT (tx->tx_msgsize > 0); - - LASSERT (peer->plp_outstanding_credits >= 0); - LASSERT (peer->plp_sent_credits >= 0); - LASSERT (peer->plp_outstanding_credits + peer->plp_sent_credits - <= plni->plni_peer_credits + peer->plp_lazy_credits); - LASSERT (peer->plp_credits >= 0); - - if (peer->plp_credits == 0) { /* no credits */ - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: no creds for %p", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, tx); - break; - } - - if (peer->plp_credits == 1 && /* last credit reserved for */ - peer->plp_outstanding_credits == 0) { /* returning credits */ - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: too few creds for %p", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, tx); - break; - } - - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &peer->plp_activeq); - - CDEBUG(D_NET, "Sending at TX=%p type=%s (%d)\n",tx, - ptllnd_msgtype2str(tx->tx_type),tx->tx_type); - - if (tx->tx_type == PTLLND_MSG_TYPE_NOOP && - (!list_empty(&peer->plp_txq) || - peer->plp_outstanding_credits < - PTLLND_CREDIT_HIGHWATER(plni))) { - /* redundant NOOP */ - ptllnd_tx_done(tx); - continue; - } - - /* Set stamp at the last minute; on a new peer, I don't know it - * until I receive the HELLO back */ - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - - /* - * Return all the credits we have - */ - tx->tx_msg.ptlm_credits = peer->plp_outstanding_credits; - peer->plp_sent_credits += peer->plp_outstanding_credits; - peer->plp_outstanding_credits = 0; - - /* - * One less credit - */ - peer->plp_credits--; - - if (plni->plni_checksum) - tx->tx_msg.ptlm_cksum = - ptllnd_cksum(&tx->tx_msg, - offsetof(kptl_msg_t, ptlm_u)); - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.threshold = 1; - md.options = PTLLND_MD_OPTIONS; - md.start = &tx->tx_msg; - md.length = tx->tx_msgsize; - - rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); - tx->tx_status = -EIO; - ptllnd_tx_done(tx); - break; - } - - LASSERT (tx->tx_type != PTLLND_RDMA_WRITE && - tx->tx_type != PTLLND_RDMA_READ); - - tx->tx_reqmdh = mdh; - PTLLND_DBGT_STAMP(tx->tx_req_posted); - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: %s %p c %d", - libcfs_id2str(peer->plp_id), - peer->plp_credits, - peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + - peer->plp_lazy_credits, - ptllnd_msgtype2str(tx->tx_type), tx, - tx->tx_msg.ptlm_credits); - - rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid, - plni->plni_portal, 0, LNET_MSG_MATCHBITS, 0, 0); - if (rc != PTL_OK) { - CERROR("PtlPut for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); - tx->tx_status = -EIO; - ptllnd_tx_done(tx); - break; - } - } -} - -int -ptllnd_passive_rdma(ptllnd_peer_t *peer, int type, lnet_msg_t *msg, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0); - __u64 matchbits; - ptl_md_t md; - ptl_handle_md_t mdh; - ptl_handle_me_t meh; - int rc; - int rc2; - time_t start; - int w; - - CDEBUG(D_NET, "niov=%d offset=%d len=%d\n",niov,offset,len); - - LASSERT (type == PTLLND_MSG_TYPE_GET || - type == PTLLND_MSG_TYPE_PUT); - - if (tx == NULL) { - CERROR("Can't allocate %s tx for %s\n", - type == PTLLND_MSG_TYPE_GET ? "GET" : "PUT/REPLY", - libcfs_id2str(peer->plp_id)); - return -ENOMEM; - } - - rc = ptllnd_set_txiov(tx, niov, iov, offset, len); - if (rc != 0) { - CERROR ("Can't allocate iov %d for %s\n", - niov, libcfs_id2str(peer->plp_id)); - rc = -ENOMEM; - goto failed; - } - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.threshold = 1; - md.max_size = 0; - md.options = PTLLND_MD_OPTIONS; - if(type == PTLLND_MSG_TYPE_GET) - md.options |= PTL_MD_OP_PUT | PTL_MD_ACK_DISABLE; - else - md.options |= PTL_MD_OP_GET; - ptllnd_set_md_buffer(&md, tx); - - start = cfs_time_current_sec(); - w = PTLLND_WARN_LONG_WAIT; - - while (!peer->plp_recvd_hello) { /* wait to validate plp_match */ - if (peer->plp_closing) { - rc = -EIO; - goto failed; - } - if (cfs_time_current_sec() > start + w) { - CWARN("Waited %ds to connect to %s\n", - w, libcfs_id2str(peer->plp_id)); - w *= 2; - } - ptllnd_wait(ni, w*1000); - } - - if (peer->plp_match < PTL_RESERVED_MATCHBITS) - peer->plp_match = PTL_RESERVED_MATCHBITS; - matchbits = peer->plp_match++; - CDEBUG(D_NET, "matchbits " LPX64 " %s\n", matchbits, - ptllnd_ptlid2str(peer->plp_ptlid)); - - rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid, - matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); - rc = -EIO; - goto failed; - } - - PTLLND_DBGT_STAMP(tx->tx_bulk_posted); - - rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDAttach for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); - rc2 = PtlMEUnlink(meh); - LASSERT (rc2 == PTL_OK); - rc = -EIO; - goto failed; - } - tx->tx_bulkmdh = mdh; - - /* - * We need to set the stamp here because it - * we could have received a HELLO above that set - * peer->plp_stamp - */ - tx->tx_msg.ptlm_dststamp = peer->plp_stamp; - - tx->tx_msg.ptlm_u.rdma.kptlrm_hdr = msg->msg_hdr; - tx->tx_msg.ptlm_u.rdma.kptlrm_matchbits = matchbits; - - if (type == PTLLND_MSG_TYPE_GET) { - tx->tx_lnetreplymsg = lnet_create_reply_msg(ni, msg); - if (tx->tx_lnetreplymsg == NULL) { - CERROR("Can't create reply for GET to %s\n", - libcfs_id2str(msg->msg_target)); - rc = -ENOMEM; - goto failed; - } - } - - tx->tx_lnetmsg = msg; - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post passive %s p %d %p", - libcfs_id2str(msg->msg_target), - peer->plp_credits, peer->plp_outstanding_credits, - peer->plp_sent_credits, - plni->plni_peer_credits + peer->plp_lazy_credits, - lnet_msgtyp2str(msg->msg_type), - (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1, - tx); - ptllnd_post_tx(tx); - return 0; - - failed: - ptllnd_tx_done(tx); - return rc; -} - -int -ptllnd_active_rdma(ptllnd_peer_t *peer, int type, - lnet_msg_t *msg, __u64 matchbits, - unsigned int niov, struct iovec *iov, - unsigned int offset, unsigned int len) -{ - lnet_ni_t *ni = peer->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_new_tx(peer, type, 0); - ptl_md_t md; - ptl_handle_md_t mdh; - int rc; - - LASSERT (type == PTLLND_RDMA_READ || - type == PTLLND_RDMA_WRITE); - - if (tx == NULL) { - CERROR("Can't allocate tx for RDMA %s with %s\n", - (type == PTLLND_RDMA_WRITE) ? "write" : "read", - libcfs_id2str(peer->plp_id)); - ptllnd_close_peer(peer, -ENOMEM); - return -ENOMEM; - } - - rc = ptllnd_set_txiov(tx, niov, iov, offset, len); - if (rc != 0) { - CERROR ("Can't allocate iov %d for %s\n", - niov, libcfs_id2str(peer->plp_id)); - rc = -ENOMEM; - goto failed; - } - - md.user_ptr = ptllnd_obj2eventarg(tx, PTLLND_EVENTARG_TYPE_TX); - md.eq_handle = plni->plni_eqh; - md.max_size = 0; - md.options = PTLLND_MD_OPTIONS; - md.threshold = (type == PTLLND_RDMA_READ) ? 2 : 1; - - ptllnd_set_md_buffer(&md, tx); - - rc = PtlMDBind(plni->plni_nih, md, LNET_UNLINK, &mdh); - if (rc != PTL_OK) { - CERROR("PtlMDBind for %s failed: %d\n", - libcfs_id2str(peer->plp_id), rc); - rc = -EIO; - goto failed; - } - - tx->tx_bulkmdh = mdh; - tx->tx_lnetmsg = msg; - - list_add_tail(&tx->tx_list, &peer->plp_activeq); - PTLLND_DBGT_STAMP(tx->tx_bulk_posted); - - if (type == PTLLND_RDMA_READ) - rc = PtlGet(mdh, peer->plp_ptlid, - plni->plni_portal, 0, matchbits, 0); - else - rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid, - plni->plni_portal, 0, matchbits, 0, - (msg == NULL) ? PTLLND_RDMA_FAIL : PTLLND_RDMA_OK); - - if (rc == PTL_OK) - return 0; - - CERROR("Can't initiate RDMA with %s: %d\n", - libcfs_id2str(peer->plp_id), rc); - - tx->tx_lnetmsg = NULL; - failed: - tx->tx_status = rc; - ptllnd_tx_done(tx); /* this will close peer */ - return rc; -} - -int -ptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *msg) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_peer_t *plp; - ptllnd_tx_t *tx; - int nob; - int rc; - - LASSERT (!msg->msg_routing); - LASSERT (msg->msg_kiov == NULL); - - LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */ - - CDEBUG(D_NET, "%s [%d]+%d,%d -> %s%s\n", - lnet_msgtyp2str(msg->msg_type), - msg->msg_niov, msg->msg_offset, msg->msg_len, - libcfs_nid2str(msg->msg_target.nid), - msg->msg_target_is_router ? "(rtr)" : ""); - - if ((msg->msg_target.pid & LNET_PID_USERFLAG) != 0) { - CERROR("Can't send to non-kernel peer %s\n", - libcfs_id2str(msg->msg_target)); - return -EHOSTUNREACH; - } - - plp = ptllnd_find_peer(ni, msg->msg_target, 1); - if (plp == NULL) - return -ENOMEM; - - switch (msg->msg_type) { - default: - LBUG(); - - case LNET_MSG_ACK: - LASSERT (msg->msg_len == 0); - break; /* send IMMEDIATE */ - - case LNET_MSG_GET: - if (msg->msg_target_is_router) - break; /* send IMMEDIATE */ - - nob = msg->msg_md->md_length; - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= plni->plni_max_msg_size) - break; - - LASSERT ((msg->msg_md->md_options & LNET_MD_KIOV) == 0); - rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_GET, msg, - msg->msg_md->md_niov, - msg->msg_md->md_iov.iov, - 0, msg->msg_md->md_length); - ptllnd_peer_decref(plp); - return rc; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - nob = msg->msg_len; - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= plp->plp_max_msg_size) - break; /* send IMMEDIATE */ - - rc = ptllnd_passive_rdma(plp, PTLLND_MSG_TYPE_PUT, msg, - msg->msg_niov, msg->msg_iov, - msg->msg_offset, msg->msg_len); - ptllnd_peer_decref(plp); - return rc; - } - - /* send IMMEDIATE - * NB copy the payload so we don't have to do a fragmented send */ - - tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_IMMEDIATE, msg->msg_len); - if (tx == NULL) { - CERROR("Can't allocate tx for lnet type %d to %s\n", - msg->msg_type, libcfs_id2str(msg->msg_target)); - ptllnd_peer_decref(plp); - return -ENOMEM; - } - - lnet_copy_iov2flat(tx->tx_msgsize, &tx->tx_msg, - offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload), - msg->msg_niov, msg->msg_iov, msg->msg_offset, - msg->msg_len); - tx->tx_msg.ptlm_u.immediate.kptlim_hdr = msg->msg_hdr; - - tx->tx_lnetmsg = msg; - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: post immediate %s p %d %p", - libcfs_id2str(msg->msg_target), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits, - lnet_msgtyp2str(msg->msg_type), - (le32_to_cpu(msg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(msg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(msg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(msg->msg_hdr.msg.get.ptl_index) : -1, - tx); - ptllnd_post_tx(tx); - ptllnd_peer_decref(plp); - return 0; -} - -void -ptllnd_rx_done(ptllnd_rx_t *rx) -{ - ptllnd_peer_t *plp = rx->rx_peer; - lnet_ni_t *ni = plp->plp_ni; - ptllnd_ni_t *plni = ni->ni_data; - - plp->plp_outstanding_credits++; - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: rx=%p done\n", - libcfs_id2str(plp->plp_id), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits, rx); - - ptllnd_check_sends(rx->rx_peer); - - LASSERT (plni->plni_nrxs > 0); - plni->plni_nrxs--; -} - -int -ptllnd_eager_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - void **new_privatep) -{ - /* Shouldn't get here; recvs only block for router buffers */ - LBUG(); - return 0; -} - -int -ptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *msg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - ptllnd_rx_t *rx = private; - int rc = 0; - int nob; - - LASSERT (kiov == NULL); - LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */ - - switch (rx->rx_msg->ptlm_type) { - default: - LBUG(); - - case PTLLND_MSG_TYPE_IMMEDIATE: - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[mlen]); - if (nob > rx->rx_nob) { - CERROR("Immediate message from %s too big: %d(%d)\n", - libcfs_id2str(rx->rx_peer->plp_id), - nob, rx->rx_nob); - rc = -EPROTO; - break; - } - lnet_copy_flat2iov(niov, iov, offset, - rx->rx_nob, rx->rx_msg, - offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload), - mlen); - lnet_finalize(ni, msg, 0); - break; - - case PTLLND_MSG_TYPE_PUT: - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_READ, msg, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - niov, iov, offset, mlen); - break; - - case PTLLND_MSG_TYPE_GET: - if (msg != NULL) - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, msg, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - msg->msg_niov, msg->msg_iov, - msg->msg_offset, msg->msg_len); - else - rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, NULL, - rx->rx_msg->ptlm_u.rdma.kptlrm_matchbits, - 0, NULL, 0, 0); - break; - } - - ptllnd_rx_done(rx); - return rc; -} - -void -ptllnd_abort_on_nak(lnet_ni_t *ni) -{ - ptllnd_ni_t *plni = ni->ni_data; - - if (plni->plni_dump_on_nak) - ptllnd_dump_history(); - - if (plni->plni_abort_on_nak) - abort(); -} - -void -ptllnd_parse_request(lnet_ni_t *ni, ptl_process_id_t initiator, - kptl_msg_t *msg, unsigned int nob) -{ - ptllnd_ni_t *plni = ni->ni_data; - const int basenob = offsetof(kptl_msg_t, ptlm_u); - lnet_process_id_t srcid; - ptllnd_rx_t rx; - int flip; - __u16 msg_version; - __u32 msg_cksum; - ptllnd_peer_t *plp; - int rc; - - if (nob < 6) { - CERROR("Very short receive from %s\n", - ptllnd_ptlid2str(initiator)); - return; - } - - /* I can at least read MAGIC/VERSION */ - - flip = msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC); - if (!flip && msg->ptlm_magic != PTLLND_MSG_MAGIC) { - CERROR("Bad protocol magic %08x from %s\n", - msg->ptlm_magic, ptllnd_ptlid2str(initiator)); - return; - } - - msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; - - if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad protocol version %04x from %s\n", - (__u32)msg_version, ptllnd_ptlid2str(initiator)); - ptllnd_abort_on_nak(ni); - return; - } - - if (nob < basenob) { - CERROR("Short receive from %s: got %d, wanted at least %d\n", - ptllnd_ptlid2str(initiator), nob, basenob); - return; - } - - /* checksum must be computed with - * 1) ptlm_cksum zero and - * 2) BEFORE anything gets modified/flipped - */ - msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum; - msg->ptlm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != ptllnd_cksum(msg, offsetof(kptl_msg_t, ptlm_u))) { - CERROR("Bad checksum from %s\n", ptllnd_ptlid2str(initiator)); - return; - } - - msg->ptlm_version = msg_version; - msg->ptlm_cksum = msg_cksum; - - if (flip) { - /* NB stamps are opaque cookies */ - __swab32s(&msg->ptlm_nob); - __swab64s(&msg->ptlm_srcnid); - __swab64s(&msg->ptlm_dstnid); - __swab32s(&msg->ptlm_srcpid); - __swab32s(&msg->ptlm_dstpid); - } - - srcid.nid = msg->ptlm_srcnid; - srcid.pid = msg->ptlm_srcpid; - - if (LNET_NIDNET(msg->ptlm_srcnid) != LNET_NIDNET(ni->ni_nid)) { - CERROR("Bad source id %s from %s\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - CERROR("NAK from %s (%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - ptllnd_abort_on_nak(ni); - return; - } - - if (msg->ptlm_dstnid != ni->ni_nid || - msg->ptlm_dstpid != the_lnet.ln_pid) { - CERROR("Bad dstid %s (%s expected) from %s\n", - libcfs_id2str((lnet_process_id_t) { - .nid = msg->ptlm_dstnid, - .pid = msg->ptlm_dstpid}), - libcfs_id2str((lnet_process_id_t) { - .nid = ni->ni_nid, - .pid = the_lnet.ln_pid}), - libcfs_id2str(srcid)); - return; - } - - if (msg->ptlm_dststamp != plni->plni_stamp) { - CERROR("Bad dststamp "LPX64"("LPX64" expected) from %s\n", - msg->ptlm_dststamp, plni->plni_stamp, - libcfs_id2str(srcid)); - return; - } - - PTLLND_HISTORY("RX %s: %s %d %p", libcfs_id2str(srcid), - ptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, &rx); - - switch (msg->ptlm_type) { - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - if (nob < basenob + sizeof(kptl_rdma_msg_t)) { - CERROR("Short rdma request from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - if (flip) - __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - if (nob < offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)) { - CERROR("Short immediate from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - break; - - case PTLLND_MSG_TYPE_HELLO: - if (nob < basenob + sizeof(kptl_hello_msg_t)) { - CERROR("Short hello from %s(%s)\n", - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - if(flip){ - __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits); - __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); - } - break; - - case PTLLND_MSG_TYPE_NOOP: - break; - - default: - CERROR("Bad message type %d from %s(%s)\n", msg->ptlm_type, - libcfs_id2str(srcid), - ptllnd_ptlid2str(initiator)); - return; - } - - plp = ptllnd_find_peer(ni, srcid, 0); - if (plp == NULL) { - CERROR("Can't find peer %s\n", libcfs_id2str(srcid)); - return; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - if (plp->plp_recvd_hello) { - CERROR("Unexpected HELLO from %s\n", - libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - } - - plp->plp_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size; - plp->plp_match = msg->ptlm_u.hello.kptlhm_matchbits; - plp->plp_stamp = msg->ptlm_srcstamp; - plp->plp_recvd_hello = 1; - - } else if (!plp->plp_recvd_hello) { - - CERROR("Bad message type %d (HELLO expected) from %s\n", - msg->ptlm_type, libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - - } else if (msg->ptlm_srcstamp != plp->plp_stamp) { - - CERROR("Bad srcstamp "LPX64"("LPX64" expected) from %s\n", - msg->ptlm_srcstamp, plp->plp_stamp, - libcfs_id2str(srcid)); - ptllnd_peer_decref(plp); - return; - } - - /* Check peer only sends when I've sent her credits */ - if (plp->plp_sent_credits == 0) { - CERROR("%s[%d/%d+%d(%d)]: unexpected message\n", - libcfs_id2str(plp->plp_id), - plp->plp_credits, plp->plp_outstanding_credits, - plp->plp_sent_credits, - plni->plni_peer_credits + plp->plp_lazy_credits); - return; - } - plp->plp_sent_credits--; - - /* No check for credit overflow - the peer may post new buffers after - * the startup handshake. */ - if (msg->ptlm_credits > 0) { - plp->plp_credits += msg->ptlm_credits; - ptllnd_check_sends(plp); - } - - /* All OK so far; assume the message is good... */ - - rx.rx_peer = plp; - rx.rx_msg = msg; - rx.rx_nob = nob; - plni->plni_nrxs++; - - switch (msg->ptlm_type) { - default: /* message types have been checked already */ - ptllnd_rx_done(&rx); - break; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - rc = lnet_parse(ni, &msg->ptlm_u.rdma.kptlrm_hdr, - msg->ptlm_srcnid, &rx, 1); - if (rc < 0) - ptllnd_rx_done(&rx); - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - rc = lnet_parse(ni, &msg->ptlm_u.immediate.kptlim_hdr, - msg->ptlm_srcnid, &rx, 0); - if (rc < 0) - ptllnd_rx_done(&rx); - break; - } - - ptllnd_peer_decref(plp); -} - -void -ptllnd_buf_event (lnet_ni_t *ni, ptl_event_t *event) -{ - ptllnd_buffer_t *buf = ptllnd_eventarg2obj(event->md.user_ptr); - ptllnd_ni_t *plni = ni->ni_data; - char *msg = &buf->plb_buffer[event->offset]; - int repost; - int unlinked = event->type == PTL_EVENT_UNLINK; - - LASSERT (buf->plb_ni == ni); - LASSERT (event->type == PTL_EVENT_PUT_END || - event->type == PTL_EVENT_UNLINK); - - if (event->ni_fail_type != PTL_NI_OK) { - - CERROR("event type %s(%d), status %s(%d) from %s\n", - ptllnd_evtype2str(event->type), event->type, - ptllnd_errtype2str(event->ni_fail_type), - event->ni_fail_type, - ptllnd_ptlid2str(event->initiator)); - - } else if (event->type == PTL_EVENT_PUT_END) { -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force message alignment - someone sending an - * odd-length message could misalign subsequent messages */ - if ((event->mlength & 7) != 0) { - CERROR("Message from %s has odd length %llu: " - "probable version incompatibility\n", - ptllnd_ptlid2str(event->initiator), - event->mlength); - LBUG(); - } -#endif - LASSERT ((event->offset & 7) == 0); - - ptllnd_parse_request(ni, event->initiator, - (kptl_msg_t *)msg, event->mlength); - } - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - /* UNLINK event only on explicit unlink */ - repost = (event->unlinked && event->type != PTL_EVENT_UNLINK); - if (event->unlinked) - unlinked = 1; -#else - /* UNLINK event only on implicit unlink */ - repost = (event->type == PTL_EVENT_UNLINK); -#endif - - if (unlinked) { - LASSERT(buf->plb_posted); - buf->plb_posted = 0; - plni->plni_nposted_buffers--; - } - - if (repost) - (void) ptllnd_post_buffer(buf); -} - -void -ptllnd_tx_event (lnet_ni_t *ni, ptl_event_t *event) -{ - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx = ptllnd_eventarg2obj(event->md.user_ptr); - int error = (event->ni_fail_type != PTL_NI_OK); - int isreq; - int isbulk; -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - int unlinked = event->unlinked; -#else - int unlinked = (event->type == PTL_EVENT_UNLINK); -#endif - - if (error) - CERROR("Error %s(%d) event %s(%d) unlinked %d, %s(%d) for %s\n", - ptllnd_errtype2str(event->ni_fail_type), - event->ni_fail_type, - ptllnd_evtype2str(event->type), event->type, - unlinked, ptllnd_msgtype2str(tx->tx_type), tx->tx_type, - libcfs_id2str(tx->tx_peer->plp_id)); - - LASSERT (!PtlHandleIsEqual(event->md_handle, PTL_INVALID_HANDLE)); - - isreq = PtlHandleIsEqual(event->md_handle, tx->tx_reqmdh); - if (isreq) { - LASSERT (event->md.start == (void *)&tx->tx_msg); - if (unlinked) { - tx->tx_reqmdh = PTL_INVALID_HANDLE; - PTLLND_DBGT_STAMP(tx->tx_req_done); - } - } - - isbulk = PtlHandleIsEqual(event->md_handle, tx->tx_bulkmdh); - if ( isbulk && unlinked ) { - tx->tx_bulkmdh = PTL_INVALID_HANDLE; - PTLLND_DBGT_STAMP(tx->tx_bulk_done); - } - - LASSERT (!isreq != !isbulk); /* always one and only 1 match */ - - PTLLND_HISTORY("%s[%d/%d+%d(%d)]: TX done %p %s%s", - libcfs_id2str(tx->tx_peer->plp_id), - tx->tx_peer->plp_credits, - tx->tx_peer->plp_outstanding_credits, - tx->tx_peer->plp_sent_credits, - plni->plni_peer_credits + tx->tx_peer->plp_lazy_credits, - tx, isreq ? "REQ" : "BULK", unlinked ? "(unlinked)" : ""); - - LASSERT (!isreq != !isbulk); /* always one and only 1 match */ - switch (tx->tx_type) { - default: - LBUG(); - - case PTLLND_MSG_TYPE_NOOP: - case PTLLND_MSG_TYPE_HELLO: - case PTLLND_MSG_TYPE_IMMEDIATE: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END); - LASSERT (isreq); - break; - - case PTLLND_MSG_TYPE_GET: - LASSERT (event->type == PTL_EVENT_UNLINK || - (isreq && event->type == PTL_EVENT_SEND_END) || - (isbulk && event->type == PTL_EVENT_PUT_END)); - - if (isbulk && !error && event->type == PTL_EVENT_PUT_END) { - /* Check GET matched */ - if (event->hdr_data == PTLLND_RDMA_OK) { - lnet_set_reply_msg_len(ni, - tx->tx_lnetreplymsg, - event->mlength); - } else { - CERROR ("Unmatched GET with %s\n", - libcfs_id2str(tx->tx_peer->plp_id)); - tx->tx_status = -EIO; - } - } - break; - - case PTLLND_MSG_TYPE_PUT: - LASSERT (event->type == PTL_EVENT_UNLINK || - (isreq && event->type == PTL_EVENT_SEND_END) || - (isbulk && event->type == PTL_EVENT_GET_END)); - break; - - case PTLLND_RDMA_READ: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END || - event->type == PTL_EVENT_REPLY_END); - LASSERT (isbulk); - break; - - case PTLLND_RDMA_WRITE: - LASSERT (event->type == PTL_EVENT_UNLINK || - event->type == PTL_EVENT_SEND_END); - LASSERT (isbulk); - } - - /* Schedule ptllnd_tx_done() on error or last completion event */ - if (error || - (PtlHandleIsEqual(tx->tx_bulkmdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(tx->tx_reqmdh, PTL_INVALID_HANDLE))) { - if (error) - tx->tx_status = -EIO; - list_del(&tx->tx_list); - list_add_tail(&tx->tx_list, &plni->plni_zombie_txs); - } -} - -void -ptllnd_wait (lnet_ni_t *ni, int milliseconds) -{ - static struct timeval prevt; - static int prevt_count; - static int call_count; - - struct timeval t1; - struct timeval t2; - - ptllnd_ni_t *plni = ni->ni_data; - ptllnd_tx_t *tx; - ptl_event_t event; - int which; - int rc; - int blocked = 0; - int found = 0; - int timeout = 0; - - /* Handle any currently queued events, returning immediately if any. - * Otherwise block for the timeout and handle all events queued - * then. */ - - gettimeofday(&t1, NULL); - call_count++; - - for (;;) { - time_t then = cfs_time_current_sec(); - - rc = PtlEQPoll(&plni->plni_eqh, 1, - (timeout < 0) ? PTL_TIME_FOREVER : timeout, - &event, &which); - - if (timeout >= 0 && - (cfs_time_current_sec() - then)*1000 > timeout + 1000) { - /* 1000 mS grace.............................^ */ - CERROR("SLOW PtlEQPoll(%d): %d seconds\n", timeout, - (int)(cfs_time_current_sec() - then)); - } - - timeout = 0; - - if (rc == PTL_EQ_EMPTY) { - if (found || /* handled some events */ - milliseconds == 0 || /* just checking */ - blocked) /* blocked already */ - break; - - blocked = 1; - timeout = (milliseconds < 0) ? - PTL_TIME_FOREVER : milliseconds; - continue; - } - - LASSERT (rc == PTL_OK || rc == PTL_EQ_DROPPED); - - if (rc == PTL_EQ_DROPPED) - CERROR("Event queue: size %d is too small\n", - plni->plni_eq_size); - - found = 1; - switch (ptllnd_eventarg2type(event.md.user_ptr)) { - default: - LBUG(); - - case PTLLND_EVENTARG_TYPE_TX: - ptllnd_tx_event(ni, &event); - break; - - case PTLLND_EVENTARG_TYPE_BUF: - ptllnd_buf_event(ni, &event); - break; - } - } - - while (!list_empty(&plni->plni_zombie_txs)) { - tx = list_entry(plni->plni_zombie_txs.next, - ptllnd_tx_t, tx_list); - ptllnd_tx_done(tx); - } - - gettimeofday(&t2, NULL); - - if (prevt.tv_sec == 0 || - prevt.tv_sec != t2.tv_sec) { - PTLLND_HISTORY("%d wait entered at %d.%06d - prev %d %d.%06d", - call_count, (int)t1.tv_sec, (int)t1.tv_usec, - prevt_count, (int)prevt.tv_sec, (int)prevt.tv_usec); - prevt = t2; - } -} diff --git a/lnet/ulnds/socklnd/.cvsignore b/lnet/ulnds/socklnd/.cvsignore deleted file mode 100644 index e9955884756af11fe171e89bf99e459ac44f1a2a..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am deleted file mode 100644 index f970be9990e54e1ac51aad46d4f5956656217c84..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -if LIBLUSTRE -if BUILD_USOCKLND -noinst_LIBRARIES = libsocklnd.a -endif -endif - -noinst_HEADERS = pqtimer.h dispatch.h table.h timer.h \ - connection.h bridge.h procbridge.h -libsocklnd_a_SOURCES = pqtimer.c select.c table.c pqtimer.h \ - dispatch.h table.h timer.h procapi.c proclib.c \ - connection.c tcplnd.c connection.h -libsocklnd_a_CPPFLAGS = $(LLCPPFLAGS) -libsocklnd_a_CFLAGS = $(LLCFLAGS) diff --git a/lnet/ulnds/socklnd/README b/lnet/ulnds/socklnd/README deleted file mode 100644 index 6cb93d913485fe09ea87354514dafacb999f8c73..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/README +++ /dev/null @@ -1,53 +0,0 @@ -This library implements two NAL interfaces, both running over IP. -The first, tcpnal, creates TCP connections between participating -processes in order to transport the portals requests. The second, -ernal, provides a simple transport protocol which runs over -UDP datagrams. - -The interface functions return both of these values in host order for -convenience and readability. However this means that addresses -exchanged in messages between hosts of different orderings will not -function properly. - -Both NALs use the same support functions in order to schedule events -and communicate with the generic portals implementation. - - ------------------------- - | api | - |_______________________| - | lib | - |_______________________| - | ernal | |tcpnal | - |--------| |----------| - | udpsock| |connection| - |-----------------------| - | timer/select | - ------------------------- - - - These NALs uses the framework from fdnal of a pipe between the api -and library sides. This is wrapped up in the select on the library -side, and blocks on the api side. Performance could be severely -enhanced by collapsing this aritificial barrier, by using shared -memory queues, or by wiring the api layer directly to the library. - - -nid is defined as the low order 24-bits of the IP address of the -physical node left shifted by 8 plus a virtual node number of 0 -through 255 (really only 239). The virtual node number of a tcpnal -application should be specified using the environment variable -PTL_VIRTNODE. pid is now a completely arbitrary number in the -range of 0 to 255. The IP interface used can be overridden by -specifying the appropriate hostid by setting the PTL_HOSTID -environment variable. The value can be either dotted decimal -(n.n.n.n) or hex starting with "0x". -TCPNAL: - As the NAL needs to try to send to a particular nid/pid pair, it - will open up connections on demand. Because the port associated with - the connecting socket is different from the bound port, two - connections will normally be established between a pair of peers, with - data flowing from the anonymous connect (active) port to the advertised - or well-known bound (passive) port of each peer. - - Should the connection fail to open, an error is reported to the - library component, which causes the api request to fail. diff --git a/lnet/ulnds/socklnd/bridge.h b/lnet/ulnds/socklnd/bridge.h deleted file mode 100644 index a46cb138a654270e0771088806964ad5769888b2..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/bridge.h +++ /dev/null @@ -1,23 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef TCPNAL_PROCBRIDGE_H -#define TCPNAL_PROCBRIDGE_H - -#include <lnet/lib-lnet.h> - -typedef struct bridge { - int alive; - lnet_ni_t *b_ni; - void *lower; - void *local; - /* this doesn't really belong here */ - unsigned char iptop8; -} *bridge; - -#endif diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c deleted file mode 100644 index 51aa5356f1a105b8e2d601d5c797f1186ef5bdd2..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/connection.c +++ /dev/null @@ -1,613 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* connection.c: - This file provides a simple stateful connection manager which - builds tcp connections on demand and leaves them open for - future use. -*/ - -#include <stdlib.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <table.h> -#include <stdio.h> -#include <stdarg.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <netinet/tcp.h> -#include <lnet/types.h> -#include <lnet/lib-types.h> -#include <lnet/socklnd.h> -#include <libcfs/kp30.h> -#include <connection.h> -#include <pthread.h> -#include <errno.h> -#ifndef __CYGWIN__ -#include <syscall.h> -#endif - -/* tunables (via environment) */ -int tcpnal_acceptor_port = 988; -int tcpnal_buffer_size = 0; -int tcpnal_nagle = 0; - -int -tcpnal_env_param (char *name, int *val) -{ - char *env = getenv(name); - int n; - - if (env == NULL) - return 1; - - n = strlen(env); /* scanf may not assign on EOS */ - if (sscanf(env, "%i%n", val, &n) >= 1 && n == strlen(env)) { - CDEBUG(D_INFO, "Environment variable %s set to %d\n", - name, *val); - return 1; - } - - CERROR("Can't parse environment variable '%s=%s'\n", - name, env); - return 0; -} - -int -tcpnal_set_global_params (void) -{ - return tcpnal_env_param("TCPNAL_PORT", - &tcpnal_acceptor_port) && - tcpnal_env_param("TCPLND_PORT", - &tcpnal_acceptor_port) && - tcpnal_env_param("TCPNAL_BUFFER_SIZE", - &tcpnal_buffer_size) && - tcpnal_env_param("TCPLND_BUFFER_SIZE", - &tcpnal_buffer_size) && - tcpnal_env_param("TCPNAL_NAGLE", - &tcpnal_nagle) && - tcpnal_env_param("TCPLND_NAGLE", - &tcpnal_nagle); -} - -/* Function: compare_connection - * Arguments: connection c: a connection in the hash table - * lnet_process_id_t: an id to verify agains - * Returns: 1 if the connection is the one requested, 0 otherwise - * - * compare_connection() tests for collisions in the hash table - */ -static int compare_connection(void *arg1, void *arg2) -{ - connection c = arg1; - lnet_nid_t *nid = arg2; - - return (c->peer_nid == *nid); -} - -/* Function: connection_key - * Arguments: lnet_process_id_t id: an id to hash - * Returns: a not-particularily-well-distributed hash - * of the id - */ -static unsigned int connection_key(void *arg) -{ - lnet_nid_t *nid = arg; - - return (unsigned int)(*nid); -} - -void -close_connection(void *arg) -{ - connection c = arg; - - close(c->fd); - free(c); -} - -/* Function: remove_connection - * Arguments: c: the connection to remove - */ -void remove_connection(void *arg) -{ - connection c = arg; - - hash_table_remove(c->m->connections,&c->peer_nid); - close_connection(c); -} - - -/* Function: read_connection: - * Arguments: c: the connection to read from - * dest: the buffer to read into - * len: the number of bytes to read - * Returns: success as 1, or failure as 0 - * - * read_connection() reads data from the connection, continuing - * to read partial results until the request is satisfied or - * it errors. TODO: this read should be covered by signal protection. - */ -int read_connection(connection c, - unsigned char *dest, - int len) -{ - int offset = 0,rc; - - if (len) { - do { -#ifndef __CYGWIN__ - rc = syscall(SYS_read, c->fd, dest+offset, len-offset); -#else - rc = recv(c->fd, dest+offset, len-offset, 0); -#endif - if (rc <= 0) { - if (errno == EINTR) { - rc = 0; - } else { - remove_connection(c); - return (0); - } - } - offset += rc; - } while (offset < len); - } - return (1); -} - -static int connection_input(void *d) -{ - connection c = d; - return((*c->m->handler)(c->m->handler_arg,c)); -} - - -static connection -allocate_connection(manager m, - lnet_nid_t nid, - int fd) -{ - connection c=malloc(sizeof(struct connection)); - - c->m=m; - c->fd=fd; - c->peer_nid = nid; - - register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,&nid); - return(c); -} - -int -tcpnal_write(lnet_nid_t nid, int sockfd, void *buffer, int nob) -{ - int rc = syscall(SYS_write, sockfd, buffer, nob); - - /* NB called on an 'empty' socket with huge buffering! */ - if (rc == nob) - return 0; - - if (rc < 0) { - CERROR("Failed to send to %s: %s\n", - libcfs_nid2str(nid), strerror(errno)); - return -1; - } - - CERROR("Short send to %s: %d/%d\n", - libcfs_nid2str(nid), rc, nob); - return -1; -} - -int -tcpnal_read(lnet_nid_t nid, int sockfd, void *buffer, int nob) -{ - int rc; - - while (nob > 0) { - rc = syscall(SYS_read, sockfd, buffer, nob); - - if (rc == 0) { - CERROR("Unexpected EOF from %s\n", - libcfs_nid2str(nid)); - return -1; - } - - if (rc < 0) { - CERROR("Failed to receive from %s: %s\n", - libcfs_nid2str(nid), strerror(errno)); - return -1; - } - - nob -= rc; - } - return 0; -} - -int -tcpnal_hello (int sockfd, lnet_nid_t nid) -{ - struct timeval tv; - __u64 incarnation; - int rc; - int nob; - lnet_acceptor_connreq_t cr; - lnet_hdr_t hdr; - lnet_magicversion_t hmv; - - gettimeofday(&tv, NULL); - incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; - - memset(&cr, 0, sizeof(cr)); - cr.acr_magic = LNET_PROTO_ACCEPTOR_MAGIC; - cr.acr_version = LNET_PROTO_ACCEPTOR_VERSION; - cr.acr_nid = nid; - - /* hmv initialised and copied separately into hdr; compiler "optimize" - * likely due to confusion about pointer alias of hmv and hdr when this - * was done in-place. */ - hmv.magic = cpu_to_le32(LNET_PROTO_TCP_MAGIC); - hmv.version_major = cpu_to_le32(LNET_PROTO_TCP_VERSION_MAJOR); - hmv.version_minor = cpu_to_le32(LNET_PROTO_TCP_VERSION_MINOR); - - memset (&hdr, 0, sizeof (hdr)); - - CLASSERT (sizeof (hmv) == sizeof (hdr.dest_nid)); - memcpy(&hdr.dest_nid, &hmv, sizeof(hmv)); - - /* hdr.src_nid/src_pid are ignored at dest */ - - hdr.type = cpu_to_le32(LNET_MSG_HELLO); - hdr.msg.hello.type = cpu_to_le32(SOCKLND_CONN_ANY); - hdr.msg.hello.incarnation = cpu_to_le64(incarnation); - - /* I don't send any interface info */ - - /* Assume sufficient socket buffering for these messages... */ - rc = tcpnal_write(nid, sockfd, &cr, sizeof(cr)); - if (rc != 0) - return -1; - - rc = tcpnal_write(nid, sockfd, &hdr, sizeof(hdr)); - if (rc != 0) - return -1; - - rc = tcpnal_read(nid, sockfd, &hmv, sizeof(hmv)); - if (rc != 0) - return -1; - - if (hmv.magic != le32_to_cpu(LNET_PROTO_TCP_MAGIC)) { - CERROR ("Bad magic %#08x (%#08x expected) from %s\n", - cpu_to_le32(hmv.magic), LNET_PROTO_TCP_MAGIC, - libcfs_nid2str(nid)); - return -1; - } - - if (hmv.version_major != cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR) || - hmv.version_minor != cpu_to_le16 (LNET_PROTO_TCP_VERSION_MINOR)) { - CERROR ("Incompatible protocol version %d.%d (%d.%d expected)" - " from %s\n", - le16_to_cpu (hmv.version_major), - le16_to_cpu (hmv.version_minor), - LNET_PROTO_TCP_VERSION_MAJOR, - LNET_PROTO_TCP_VERSION_MINOR, - libcfs_nid2str(nid)); - return -1; - } - -#if (LNET_PROTO_TCP_VERSION_MAJOR != 1) -# error "This code only understands protocol version 1.x" -#endif - /* version 1 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - - rc = tcpnal_read(nid, sockfd, ((char *)&hdr) + sizeof (hmv), - sizeof(hdr) - sizeof(hmv)); - if (rc != 0) - return -1; - - /* ...and check we got what we expected */ - if (hdr.type != cpu_to_le32 (LNET_MSG_HELLO)) { - CERROR ("Expecting a HELLO hdr " - " but got type %d with %d payload from %s\n", - le32_to_cpu (hdr.type), - le32_to_cpu (hdr.payload_length), libcfs_nid2str(nid)); - return -1; - } - - if (le64_to_cpu(hdr.src_nid) == LNET_NID_ANY) { - CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY\n"); - return -1; - } - - if (nid != le64_to_cpu (hdr.src_nid)) { - CERROR ("Connected to %s, but expecting %s\n", - libcfs_nid2str(le64_to_cpu (hdr.src_nid)), - libcfs_nid2str(nid)); - return -1; - } - - /* Ignore any interface info in the payload */ - nob = le32_to_cpu(hdr.payload_length); - if (nob != 0) { - CERROR("Unexpected HELLO payload %d from %s\n", - nob, libcfs_nid2str(nid)); - return -1; - } - - return 0; -} - -/* Function: force_tcp_connection - * Arguments: t: tcpnal - * dest: portals endpoint for the connection - * Returns: an allocated connection structure, either - * a pre-existing one, or a new connection - */ -connection force_tcp_connection(manager m, - lnet_nid_t nid, - procbridge pb) -{ - unsigned int ip = LNET_NIDADDR(nid); - connection conn; - struct sockaddr_in addr; - struct sockaddr_in locaddr; - int fd; - int option; - int rc; - int sz; - - pthread_mutex_lock(&m->conn_lock); - - conn = hash_table_find(m->connections, &nid); - if (conn) - goto out; - - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(tcpnal_acceptor_port); - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_addr.s_addr = INADDR_ANY; - locaddr.sin_port = htons(m->port); - -#if 1 /* tcpnal connects from a non-privileged port */ - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("tcpnal socket failed"); - goto out; - } - - option = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &option, sizeof(option)); - if (rc != 0) { - perror ("Can't set SO_REUSEADDR for socket"); - close(fd); - goto out; - } - - if (m->port != 0) { - /* Bind all subsequent connections to the same port */ - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc != 0) { - perror("Error binding port"); - close(fd); - goto out; - } - } - - rc = connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); - if (rc != 0) { - perror("Error connecting to remote host"); - close(fd); - goto out; - } - - sz = sizeof(locaddr); - rc = getsockname(fd, (struct sockaddr *)&locaddr, &sz); - if (rc != 0) { - perror ("Error on getsockname"); - close(fd); - goto out; - } - - if (m->port == 0) - m->port = ntohs(locaddr.sin_port); - -#else - for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) { - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("tcpnal socket failed"); - goto out; - } - - option = 1; - rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, - &option, sizeof(option)); - if (rc != 0) { - perror ("Can't set SO_REUSEADDR for socket"); - close(fd); - goto out; - } - - locaddr.sin_port = htons(rport); - rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == 0 || errno == EACCES) { - rc = connect(fd, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); - if (rc == 0) { - break; - } else if (errno != EADDRINUSE && errno != EADDRNOTAVAIL) { - perror("Error connecting to remote host"); - close(fd); - goto out; - } - } else if (errno != EADDRINUSE) { - perror("Error binding to privileged port"); - close(fd); - goto out; - } - close(fd); - } - - if (rport == IPPORT_RESERVED / 2) { - fprintf(stderr, "Out of ports trying to bind to a reserved port\n"); - goto out; - } -#endif - - option = tcpnal_nagle ? 0 : 1; - setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); - option = tcpnal_buffer_size; - if (option != 0) { - setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); - option = tcpnal_buffer_size; - setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); - } - - /* say hello */ - if (tcpnal_hello(fd, nid)) - goto out; - - conn = allocate_connection(m, nid, fd); - - /* let nal thread know this event right away */ - if (conn) - procbridge_wakeup_nal(pb); - -out: - pthread_mutex_unlock(&m->conn_lock); - return (conn); -} - - -#if 0 /* we don't accept connections */ -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) -{ - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - pthread_mutex_lock(&m->conn_lock); - allocate_connection(m,htonl(nid),0/*pid*/,fd); - pthread_mutex_unlock(&m->conn_lock); - return(1); -} - -/* Function: bind_socket - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: 1 on success, or 0 on error - * - * bind_socket() attempts to allocate and bind a socket to the requested - * port, or dynamically assign one from the kernel should the port be - * zero. Sets the bound and bound_handler elements of m. - * - * TODO: The port should be an explicitly sized type. - */ -static int bind_socket(manager m,unsigned short port) -{ - struct sockaddr_in addr; - int alen=sizeof(struct sockaddr_in); - - if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) - return(0); - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = 0; - addr.sin_port = htons(port); - - if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ - perror ("tcpnal bind"); - return(0); - } - - getsockname(m->bound,(struct sockaddr *)&addr, &alen); - - m->bound_handler=register_io_handler(m->bound,READ_HANDLER, - new_connection,m); - listen(m->bound,5); - m->port=addr.sin_port; - return(1); -} -#endif - - -/* Function: shutdown_connections - * Arguments: m: the manager structure - * - * close all connections and reclaim resources - */ -void shutdown_connections(manager m) -{ -#if 0 - /* we don't accept connections */ - close(m->bound); - remove_io_handler(m->bound_handler); -#endif - hash_destroy_table(m->connections,close_connection); - free(m); -} - - -/* Function: init_connections - * Arguments: t: the nal state for this interface - * Returns: a newly allocated manager structure, or - * zero if the fixed port could not be bound - */ -manager init_connections(int (*input)(void *, void *), void *a) -{ - manager m = (manager)malloc(sizeof(struct manager)); - - m->connections = hash_create_table(compare_connection,connection_key); - m->handler = input; - m->handler_arg = a; - m->port = 0; /* set on first connection */ - pthread_mutex_init(&m->conn_lock, 0); - - return m; -#if 0 - if (bind_socket(m,pid)) - return(m); - - free(m); - return(0); -#endif -} diff --git a/lnet/ulnds/socklnd/connection.h b/lnet/ulnds/socklnd/connection.h deleted file mode 100644 index 0c4718e91b41eea2806d9221fd9f3322189ffff9..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/connection.h +++ /dev/null @@ -1,35 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include <table.h> -#include <procbridge.h> - -typedef struct manager { - table connections; - pthread_mutex_t conn_lock; /* protect connections table */ -#if 0 /* we don't accept connections */ - int bound; - io_handler bound_handler; -#endif - int (*handler)(void *, void *); - void *handler_arg; - int port; -} *manager; - - -typedef struct connection { - lnet_nid_t peer_nid; - int fd; - manager m; -} *connection; - -connection force_tcp_connection(manager m, lnet_nid_t nid, procbridge pb); -manager init_connections(int (*f)(void *, void *), void *); -void remove_connection(void *arg); -void shutdown_connections(manager m); -int read_connection(connection c, unsigned char *dest, int len); diff --git a/lnet/ulnds/socklnd/dispatch.h b/lnet/ulnds/socklnd/dispatch.h deleted file mode 100644 index 300f33bff965c8081d27c2540ababc4c9c91f8fb..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/dispatch.h +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* this file is only called dispatch.h to prevent it - from colliding with /usr/include/sys/select.h */ - -typedef struct io_handler *io_handler; - -struct io_handler{ - io_handler *last; - io_handler next; - int fd; - int type; - int (*function)(void *); - void *argument; - int disabled; -}; - - -#define READ_HANDLER 1 -#define WRITE_HANDLER 2 -#define EXCEPTION_HANDLER 4 -#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) - -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg); - -void remove_io_handler (io_handler i); -void init_unix_timer(void); -void select_timer_block(when until); -when now(void); - -/* - * hacking for CFS internal MPI testing - */ -#define ENABLE_SELECT_DISPATCH diff --git a/lnet/ulnds/socklnd/pqtimer.c b/lnet/ulnds/socklnd/pqtimer.c deleted file mode 100644 index 98c48ebe3fa8d059090e8ba01a05d310444d0931..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/pqtimer.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* timer.c: - * this file implements a simple priority-queue based timer system. when - * combined with a file which implements now() and block(), it can - * be used to provide course-grained time-based callbacks. - */ - -#include <pqtimer.h> -#include <stdlib.h> -#include <string.h> - -struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -}; - -typedef struct thunk *thunk; -struct thunk { - void (*f)(void *); - void *a; - thunk next; -}; - -extern when now(void); - -static thunk thunks; -static int internal; -static void (*block_function)(when); -static int number_of_timers; -static int size_of_pqueue; -static timer *timers; - - -static void heal(int where) -{ - int left=(where<<1); - int right=(where<<1)+1; - int min=where; - timer temp; - - if (left <= number_of_timers) - if (timers[left]->w < timers[min]->w) min=left; - if (right <= number_of_timers) - if (timers[right]->w < timers[min]->w) min=right; - if (min != where){ - temp=timers[where]; - timers[where]=timers[min]; - timers[min]=temp; - heal(min); - } -} - -static void add_pqueue(int i) -{ - timer temp; - int parent=(i>>1); - if ((i>1) && (timers[i]->w< timers[parent]->w)){ - temp=timers[i]; - timers[i]=timers[parent]; - timers[parent]=temp; - add_pqueue(parent); - } -} - -static void add_timer(timer t) -{ - if (size_of_pqueue<(number_of_timers+2)){ - int oldsize=size_of_pqueue; - timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); - memcpy(new,timers,sizeof(timer)*oldsize); - timers=new; - } - timers[++number_of_timers]=t; - add_pqueue(number_of_timers); -} - -/* Function: register_timer - * Arguments: interval: the time interval from the current time when - * the timer function should be called - * function: the function to call when the time has expired - * argument: the argument to call it with. - * Returns: a pointer to a timer structure - */ -timer register_timer(when interval, - void (*function)(void *), - void *argument) -{ - timer t=(timer)malloc(sizeof(struct timer)); - - t->arg=argument; - t->function=function; - t->interval=interval; - t->disable=0; - t->w=now()+interval; - add_timer(t); - if (!internal && (number_of_timers==1)) - block_function(t->w); - return(t); -} - -/* Function: remove_timer - * Arguments: t: - * Returns: nothing - * - * remove_timer removes a timer from the system, insuring - * that it will never be called. It does not actually - * free the timer due to reentrancy issues. - */ - -void remove_timer(timer t) -{ - t->disable=1; -} - - - -void timer_fire() -{ - timer current; - - current=timers[1]; - timers[1]=timers[number_of_timers--]; - heal(1); - if (!current->disable) { - (*current->function)(current->arg); - } - free(current); -} - -when next_timer(void) -{ - when here=now(); - - while (number_of_timers && (timers[1]->w <= here)) timer_fire(); - if (number_of_timers) return(timers[1]->w); - return(0); -} - -/* Function: timer_loop - * Arguments: none - * Returns: never - * - * timer_loop() is the blocking dispatch function for the timer. - * Is calls the block() function registered with init_timer, - * and handles associated with timers that have been registered. - */ -void timer_loop() -{ - when here; - - while (1){ - thunk z; - here=now(); - - for (z=thunks;z;z=z->next) (*z->f)(z->a); - - if (number_of_timers){ - if (timers[1]->w > here){ - (*block_function)(timers[1]->w); - } else { - timer_fire(); - } - } else { - thunk z; - for (z=thunks;z;z=z->next) (*z->f)(z->a); - (*block_function)(0); - } - } -} - - -/* Function: register_thunk - * Arguments: f: the function to call - * a: the single argument to call it with - * - * Thunk functions get called at irregular intervals, they - * should not assume when, or take a particularily long - * amount of time. Thunks are for background cleanup tasks. - */ -void register_thunk(void (*f)(void *),void *a) -{ - thunk t=(void *)malloc(sizeof(struct thunk)); - t->f=f; - t->a=a; - t->next=thunks; - thunks=t; -} - -/* Function: initialize_timer - * Arguments: block: the function to call to block for the specified interval - * - * initialize_timer() must be called before any other timer function, - * including timer_loop. - */ -void initialize_timer(void (*block)(when)) -{ - block_function=block; - number_of_timers=0; - size_of_pqueue=10; - timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); - thunks=0; -} diff --git a/lnet/ulnds/socklnd/pqtimer.h b/lnet/ulnds/socklnd/pqtimer.h deleted file mode 100644 index 11efb0e22f8cda82cc85948f7c69c2330488daaa..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/pqtimer.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned long long when; -when now(void); -typedef struct timer *timer; -timer register_timer(when interval, - void (*function)(void *), - void *argument); -timer register_timer_wait(void); -void remove_timer(timer); -void timer_loop(void); -void initialize_timer(void (*block)(when)); -void timer_fire(void); - - -#define HZ 0x100000000ull - - diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c deleted file mode 100644 index 5fd5f46c9cb250ff75da91e17ddcc1b5fd7330e5..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/procapi.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* api.c: - * This file provides the 'api' side for the process-based nals. - * it is responsible for creating the 'library' side thread, - * and passing wrapped portals transactions to it. - * - * Along with initialization, shutdown, and transport to the library - * side, this file contains some stubs to satisfy the nal definition. - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#ifndef __CYGWIN__ -# include <syscall.h> -#endif -#include <netdb.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <procbridge.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <errno.h> -#ifdef HAVE_GETHOSTBYNAME -# include <sys/utsname.h> -#endif - -#if !HAVE_LIBPTHREAD -# error "This LND requires a multi-threaded runtime" -#endif - -/* XXX CFS workaround, to give a chance to let nal thread wake up - * from waiting in select - */ -static int procbridge_notifier_handler(void *arg) -{ - static char buf[8]; - procbridge p = (procbridge) arg; - - syscall(SYS_read, p->notifier[1], buf, sizeof(buf)); - return 1; -} - -void procbridge_wakeup_nal(procbridge p) -{ - static char buf[8]; - syscall(SYS_write, p->notifier[0], buf, sizeof(buf)); -} - -lnd_t the_tcplnd = { - .lnd_type = SOCKLND, - .lnd_startup = procbridge_startup, - .lnd_shutdown = procbridge_shutdown, - .lnd_send = tcpnal_send, - .lnd_recv = tcpnal_recv, - .lnd_notify = tcpnal_notify, -}; -int tcpnal_running; - -/* Function: shutdown - * Arguments: ni: the instance of me - * - * cleanup nal state, reclaim the lower side thread and - * its state using PTL_FINI codepoint - */ -void -procbridge_shutdown(lnet_ni_t *ni) -{ - bridge b=(bridge)ni->ni_data; - procbridge p=(procbridge)b->local; - - p->nal_flags |= NAL_FLAG_STOPPING; - procbridge_wakeup_nal(p); - - do { - pthread_mutex_lock(&p->mutex); - if (p->nal_flags & NAL_FLAG_STOPPED) { - pthread_mutex_unlock(&p->mutex); - break; - } - pthread_cond_wait(&p->cond, &p->mutex); - pthread_mutex_unlock(&p->mutex); - } while (1); - - free(p); - tcpnal_running = 0; -} - -#ifdef ENABLE_SELECT_DISPATCH -procbridge __global_procbridge = NULL; -#endif - -/* Function: procbridge_startup - * - * Arguments: ni: the instance of me - * interfaces: ignored - * - * Returns: portals rc - * - * initializes the tcp nal. we define unix_failure as an - * error wrapper to cut down clutter. - */ -int -procbridge_startup (lnet_ni_t *ni) -{ - procbridge p; - bridge b; - int rc; - - /* NB The local NID is not assigned. We only ever connect to the socknal, - * which assigns the src nid/pid on incoming non-privileged connections - * (i.e. us), and we don't accept connections. */ - - LASSERT (ni->ni_lnd == &the_tcplnd); - LASSERT (!tcpnal_running); /* only single instance supported */ - LASSERT (ni->ni_interfaces[0] == NULL); /* explicit interface(s) not supported */ - - /* The credit settings here are pretty irrelevent. Userspace tcplnd has no - * tx descriptor pool to exhaust and does a blocking send; that's the real - * limit on send concurrency. */ - ni->ni_maxtxcredits = 1000; - ni->ni_peertxcredits = 1000; - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - b->local=p; - b->b_ni = ni; - ni->ni_data = b; - - /* init procbridge */ - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - p->nal_flags = 0; - - /* initialize notifier */ - if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) { - perror("socketpair failed"); - rc = -errno; - return rc; - } - - if (!register_io_handler(p->notifier[1], READ_HANDLER, - procbridge_notifier_handler, p)) { - perror("fail to register notifier handler"); - return -ENOMEM; - } - -#ifdef ENABLE_SELECT_DISPATCH - __global_procbridge = p; -#endif - - /* create nal thread */ - rc = pthread_create(&p->t, NULL, nal_thread, b); - if (rc != 0) { - perror("nal_init: pthread_create"); - return -ESRCH; - } - - do { - pthread_mutex_lock(&p->mutex); - if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) { - pthread_mutex_unlock(&p->mutex); - break; - } - pthread_cond_wait(&p->cond, &p->mutex); - pthread_mutex_unlock(&p->mutex); - } while (1); - - if (p->nal_flags & NAL_FLAG_STOPPED) - return -ENETDOWN; - - tcpnal_running = 1; - - return 0; -} diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h deleted file mode 100644 index 2dd534bdbc1042c9397b0c3305cd6c1f5a0947c9..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/procbridge.h +++ /dev/null @@ -1,58 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef _PROCBRIDGE_H_ -#define _PROCBRIDGE_H_ - -#include <pthread.h> -#include <bridge.h> - - -#define NAL_FLAG_RUNNING 1 -#define NAL_FLAG_STOPPING 2 -#define NAL_FLAG_STOPPED 4 - -typedef struct procbridge { - /* sync between user threads and nal thread */ - pthread_t t; - pthread_cond_t cond; - pthread_mutex_t mutex; - - /* socket pair used to notify nal thread */ - int notifier[2]; - - int nal_flags; - -} *procbridge; - -typedef struct nal_init_args { - lnet_pid_t nia_requested_pid; - bridge nia_bridge; -} nal_init_args_t; - -extern void *nal_thread(void *); - -extern void procbridge_wakeup_nal(procbridge p); - -extern int procbridge_startup (lnet_ni_t *); -extern void procbridge_shutdown (lnet_ni_t *); - -extern void tcpnal_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive); - -extern int tcpnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int tcpnal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *cookie, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -extern int tcpnal_set_global_params(); - - - - -#endif diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c deleted file mode 100644 index 01faf05c9c5972a61f4ab2a67f2f2caac0882a90..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/proclib.c +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* lib.c: - * This file provides the 'library' side for the process-based nals. - * it is responsible for communication with the 'api' side and - * providing service to the generic portals 'library' - * implementation. 'library' might be better termed 'communication' - * or 'kernel'. - */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <procbridge.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netdb.h> -#include <errno.h> -#include <timer.h> -#include <dispatch.h> - -/* the following functions are stubs to satisfy the nal definition - without doing anything particularily useful*/ -extern int tcpnal_init(bridge); -extern void tcpnal_shutdown(bridge); - -static void check_stopping(void *z) -{ - bridge b = z; - procbridge p = b->local; - - if ((p->nal_flags & NAL_FLAG_STOPPING) == 0) - return; - - tcpnal_shutdown(b); - - pthread_mutex_lock(&p->mutex); - p->nal_flags |= NAL_FLAG_STOPPED; - pthread_cond_broadcast(&p->cond); - pthread_mutex_unlock(&p->mutex); - - pthread_exit(0); -} - - -/* Function: nal_thread - * Arguments: z: an opaque reference to a nal control structure - * allocated and partially populated by the api level code - * Returns: nothing, and only on error or explicit shutdown - * - * This function is the entry point of the pthread initiated on - * the api side of the interface. This thread is used to handle - * asynchronous delivery to the application. - * - * We define a limit macro to place a ceiling on limits - * for syntactic convenience - */ - -void *nal_thread(void *z) -{ - bridge b = (bridge) z; - procbridge p=b->local; - int rc; - - rc = tcpnal_init(b); - - /* - * Whatever the initialization returned is passed back to the - * user level code for further interpretation. We just exit if - * it is non-zero since something went wrong. - */ - - pthread_mutex_lock(&p->mutex); - p->nal_flags |= (rc != 0) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING; - pthread_cond_broadcast(&p->cond); - pthread_mutex_unlock(&p->mutex); - - if (rc == 0) { - /* the thunk function is called each time the timer loop - performs an operation and returns to blocking mode. we - overload this function to inform the api side that - it may be interested in looking at the event queue */ - register_thunk(check_stopping,b); - timer_loop(); - } - return(0); -} diff --git a/lnet/ulnds/socklnd/select.c b/lnet/ulnds/socklnd/select.c deleted file mode 100644 index 42c9bc121bf2f1242c28d4635564a9734c224a21..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/select.c +++ /dev/null @@ -1,421 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* select.c: - * Provides a general mechanism for registering and dispatching - * io events through the select system call. - */ - -#define DEBUG_SUBSYSTEM S_LND - -#ifdef sun -#include <sys/filio.h> -#else -#include <sys/ioctl.h> -#endif - -#include <sys/time.h> -#include <sys/types.h> -#include <stdlib.h> -#include <syscall.h> -#include <pthread.h> -#include <errno.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <procbridge.h> - - -static struct timeval beginning_of_epoch; -static io_handler io_handlers; - -/* Function: now - * - * Return: the current time in canonical units: a 64 bit number - * where the most significant 32 bits contains the number - * of seconds, and the least signficant a count of (1/(2^32))ths - * of a second. - */ -when now() -{ - struct timeval result; - - gettimeofday(&result,0); - return((((unsigned long long)result.tv_sec)<<32)| - (((unsigned long long)result.tv_usec)<<32)/1000000); -} - - -/* Function: register_io_handler - * Arguments: fd: the file descriptor of interest - * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER - * function: a function to call when io is available on fd - * arg: an opaque correlator to return to the handler - * Returns: a pointer to the io_handler structure - */ -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg) -{ - io_handler i=(io_handler)malloc(sizeof(struct io_handler)); - if ((i->fd=fd)>=0){ - i->type=type; - i->function=function; - i->argument=arg; - i->disabled=0; - i->last=&io_handlers; - if ((i->next=io_handlers)) i->next->last=&i->next; - io_handlers=i; - } - return(i); -} - -/* Function: remove_io_handler - * Arguments: i: a pointer to the handler to stop servicing - * - * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for - * later cleanup by the blocking function. - */ -void remove_io_handler (io_handler i) -{ - i->disabled=1; -} - -static void set_flag(io_handler n,fd_set *r, fd_set *w, fd_set *e) -{ - if (n->type & READ_HANDLER) FD_SET(n->fd, r); - if (n->type & WRITE_HANDLER) FD_SET(n->fd, w); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, e); -} - -static int prepare_fd_sets(fd_set *r, fd_set *w, fd_set *e) -{ - io_handler j; - io_handler *k; - int max = 0; - - FD_ZERO(r); - FD_ZERO(w); - FD_ZERO(e); - for (k=&io_handlers;*k;){ - if ((*k)->disabled){ - j=*k; - *k=(*k)->next; - free(j); - } - if (*k) { - set_flag(*k,r,w,e); - if ((*k)->fd > max) - max = (*k)->fd; - k=&(*k)->next; - } - } - return max + 1; -} - -static int execute_callbacks(fd_set *r, fd_set *w, fd_set *e) -{ - io_handler j; - int n = 0, t; - - for (j = io_handlers; j; j = j->next) { - if (j->disabled) - continue; - - t = 0; - if (FD_ISSET(j->fd, r) && (j->type & READ_HANDLER)) { - FD_CLR(j->fd, r); - t++; - } - if (FD_ISSET(j->fd, w) && (j->type & WRITE_HANDLER)) { - FD_CLR(j->fd, w); - t++; - } - if (FD_ISSET(j->fd, e) && (j->type & EXCEPTION_HANDLER)) { - FD_CLR(j->fd, e); - t++; - } - if (t == 0) - continue; - - if (!(*j->function)(j->argument)) - j->disabled = 1; - - n += t; - } - - return n; -} - -#ifdef ENABLE_SELECT_DISPATCH - -static struct { - pthread_mutex_t mutex; - pthread_cond_t cond; - int submitted; - int nready; - int maxfd; - fd_set *rset; - fd_set *wset; - fd_set *eset; - struct timeval *timeout; - struct timeval submit_time; -} fd_extra = { - PTHREAD_MUTEX_INITIALIZER, - PTHREAD_COND_INITIALIZER, - 0, 0, 0, - NULL, NULL, NULL, NULL, -}; - -extern int liblustre_wait_event(int timeout); -extern procbridge __global_procbridge; - -/* - * this will intercept syscall select() of user apps - * such as MPI libs. - */ -int select(int n, fd_set *rset, fd_set *wset, fd_set *eset, - struct timeval *timeout) -{ - LASSERT(fd_extra.submitted == 0); - - fd_extra.nready = 0; - fd_extra.maxfd = n; - fd_extra.rset = rset; - fd_extra.wset = wset; - fd_extra.eset = eset; - fd_extra.timeout = timeout; - - liblustre_wait_event(0); - pthread_mutex_lock(&fd_extra.mutex); - gettimeofday(&fd_extra.submit_time, NULL); - fd_extra.submitted = 1; - LASSERT(__global_procbridge); - procbridge_wakeup_nal(__global_procbridge); - -again: - if (fd_extra.submitted) - pthread_cond_wait(&fd_extra.cond, &fd_extra.mutex); - pthread_mutex_unlock(&fd_extra.mutex); - - liblustre_wait_event(0); - - pthread_mutex_lock(&fd_extra.mutex); - if (fd_extra.submitted) - goto again; - pthread_mutex_unlock(&fd_extra.mutex); - - LASSERT(fd_extra.nready >= 0); - LASSERT(fd_extra.submitted == 0); - return fd_extra.nready; -} - -static int merge_fds(int max, fd_set *rset, fd_set *wset, fd_set *eset) -{ - int i; - - LASSERT(rset); - LASSERT(wset); - LASSERT(eset); - - for (i = 0; i < __FD_SETSIZE/__NFDBITS; i++) { - LASSERT(!fd_extra.rset || - !(__FDS_BITS(rset)[i] & __FDS_BITS(fd_extra.rset)[i])); - LASSERT(!fd_extra.wset || - !(__FDS_BITS(wset)[i] & __FDS_BITS(fd_extra.wset)[i])); - LASSERT(!fd_extra.eset || - !(__FDS_BITS(eset)[i] & __FDS_BITS(fd_extra.eset)[i])); - - if (fd_extra.rset && __FDS_BITS(fd_extra.rset)[i]) - __FDS_BITS(rset)[i] |= __FDS_BITS(fd_extra.rset)[i]; - if (fd_extra.wset && __FDS_BITS(fd_extra.wset)[i]) - __FDS_BITS(wset)[i] |= __FDS_BITS(fd_extra.wset)[i]; - if (fd_extra.eset && __FDS_BITS(fd_extra.eset)[i]) - __FDS_BITS(eset)[i] |= __FDS_BITS(fd_extra.eset)[i]; - } - - return (fd_extra.maxfd > max ? fd_extra.maxfd : max); -} - -static inline -int timeval_ge(struct timeval *tv1, struct timeval *tv2) -{ - LASSERT(tv1 && tv2); - return ((tv1->tv_sec - tv2->tv_sec) * 1000000 + - (tv1->tv_usec - tv2->tv_usec) >= 0); -} - -/* - * choose the most recent timeout value - */ -static struct timeval *choose_timeout(struct timeval *tv1, - struct timeval *tv2) -{ - if (!tv1) - return tv2; - else if (!tv2) - return tv1; - - if (timeval_ge(tv1, tv2)) - return tv2; - else - return tv1; -} - -/* Function: select_timer_block - * Arguments: until: an absolute time when the select should return - * - * This function dispatches the various file descriptors' handler - * functions, if the kernel indicates there is io available. - */ -void select_timer_block(when until) -{ - fd_set fds[3]; - struct timeval timeout; - struct timeval *timeout_pointer, *select_timeout; - int max, nready, nexec; - int fd_handling; - -again: - if (until) { - when interval; - - interval = until - now(); - timeout.tv_sec = (interval >> 32); - timeout.tv_usec = ((interval << 32) / 1000000) >> 32; - timeout_pointer = &timeout; - } else - timeout_pointer = NULL; - - fd_handling = 0; - max = prepare_fd_sets(&fds[0], &fds[1], &fds[2]); - select_timeout = timeout_pointer; - - pthread_mutex_lock(&fd_extra.mutex); - fd_handling = fd_extra.submitted; - pthread_mutex_unlock(&fd_extra.mutex); - if (fd_handling) { - max = merge_fds(max, &fds[0], &fds[1], &fds[2]); - select_timeout = choose_timeout(timeout_pointer, fd_extra.timeout); - } - - /* XXX only compile for linux */ -#if (__WORDSIZE == 64) && !defined(__mips64__) - nready = syscall(SYS_select, max, &fds[0], &fds[1], &fds[2], - select_timeout); -#else - nready = syscall(SYS__newselect, max, &fds[0], &fds[1], &fds[2], - select_timeout); -#endif - if (nready < 0) { - CERROR("select return err %d, errno %d\n", nready, errno); - return; - } - - if (nready) { - nexec = execute_callbacks(&fds[0], &fds[1], &fds[2]); - nready -= nexec; - } else - nexec = 0; - - /* even both nready & nexec are 0, we still need try to wakeup - * upper thread since it may have timed out - */ - if (fd_handling) { - LASSERT(nready >= 0); - - pthread_mutex_lock(&fd_extra.mutex); - if (nready) { - if (fd_extra.rset) - *fd_extra.rset = fds[0]; - if (fd_extra.wset) - *fd_extra.wset = fds[1]; - if (fd_extra.eset) - *fd_extra.eset = fds[2]; - fd_extra.nready = nready; - fd_extra.submitted = 0; - } else { - struct timeval t; - - fd_extra.nready = 0; - if (fd_extra.timeout) { - gettimeofday(&t, NULL); - if (timeval_ge(&t, &fd_extra.submit_time)) - fd_extra.submitted = 0; - } - } - - pthread_cond_signal(&fd_extra.cond); - pthread_mutex_unlock(&fd_extra.mutex); - } - - /* haven't found portals event, go back to loop if time - * is not expired */ - if (!nexec) { - if (timeout_pointer == NULL || now() >= until) - goto again; - } -} - -#else /* !ENABLE_SELECT_DISPATCH */ - -/* Function: select_timer_block - * Arguments: until: an absolute time when the select should return - * - * This function dispatches the various file descriptors' handler - * functions, if the kernel indicates there is io available. - */ -void select_timer_block(when until) -{ - fd_set fds[3]; - struct timeval timeout; - struct timeval *timeout_pointer; - int max, nready; - -again: - if (until) { - when interval; - interval = until - now(); - timeout.tv_sec = (interval >> 32); - timeout.tv_usec = ((interval << 32) / 1000000) >> 32; - timeout_pointer = &timeout; - } else - timeout_pointer = NULL; - - max = prepare_fd_sets(&fds[0], &fds[1], &fds[2]); - - nready = select(max, &fds[0], &fds[1], &fds[2], timeout_pointer); - if (nready > 0) - execute_callbacks(&fds[0], &fds[1], &fds[2]); -} -#endif /* ENABLE_SELECT_DISPATCH */ - -/* Function: init_unix_timer() - * is called to initialize the library - */ -void init_unix_timer() -{ - io_handlers=0; - gettimeofday(&beginning_of_epoch, 0); - initialize_timer(select_timer_block); -} diff --git a/lnet/ulnds/socklnd/table.c b/lnet/ulnds/socklnd/table.c deleted file mode 100644 index eb390c42a36dbe5abbcb8979f4e8faa00cab220a..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/table.c +++ /dev/null @@ -1,264 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <table.h> -#include <stdlib.h> -#include <string.h> - - -/* table.c: - * a very simple hash table implementation with paramerterizable - * comparison and key generation functions. it does resize - * in order to accomidate more entries, but never collapses - * the table - */ - -static table_entry *table_lookup (table t,void *comparator, - unsigned int k, - int (*compare_function)(void *, void *), - int *success) -{ - unsigned int key=k%t->size; - table_entry *i; - - for (i=&(t->entries[key]);*i;i=&((*i)->next)){ - if (compare_function && ((*i)->key==k)) - if ((*t->compare_function)((*i)->value,comparator)){ - *success=1; - return(i); - } - } - *success=0; - return(&(t->entries[key])); -} - - -static void resize_table(table t, int size) -{ - int old_size=t->size; - table_entry *old_entries=t->entries; - int i; - table_entry j,n; - table_entry *position; - int success; - - t->size=size; - t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); - memset(t->entries,0,sizeof(table_entry)*t->size); - - for (i=0;i<old_size;i++) - for (j=old_entries[i];j;j=n){ - n=j->next; - position=table_lookup(t,0,j->key,0,&success); - j->next= *position; - *position=j; - } - free(old_entries); -} - - -/* Function: key_from_int - * Arguments: int i: value to compute the key of - * Returns: the key - */ -unsigned int key_from_int(int i) -{ - return(i); -} - - -/* Function: key_from_string - * Arguments: char *s: the null terminated string - * to compute the key of - * Returns: the key - */ -unsigned int key_from_string(char *s) -{ - unsigned int result=0; - unsigned char *n; - int i; - if (!s) return(1); - for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; - return(result); -} - - -/* Function: hash_create_table - * Arguments: compare_function: a function to compare - * a table instance with a correlator - * key_function: a function to generate a 32 bit - * hash key from a correlator - * Returns: a pointer to the new table - */ -table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(void *)) -{ - table new=(table)malloc(sizeof(struct table)); - memset(new, 0, sizeof(struct table)); - - new->compare_function=compare_function; - new->key_function=key_function; - new->number_of_entries=0; - new->size=4; - new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); - memset(new->entries,0,sizeof(table_entry)*new->size); - return(new); -} - - -/* Function: hash_table_find - * Arguments: t: a table to look in - * comparator: a value to access the table entry - * Returns: the element references to by comparator, or null - */ -void *hash_table_find (table t, void *comparator) -{ - int success; - table_entry* entry=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function, - &success); - if (success) return((*entry)->value); - return(0); -} - - -/* Function: hash_table_insert - * Arguments: t: a table to insert the object - * value: the object to put in the table - * comparator: the value by which the object - * will be addressed - * Returns: nothing - */ -void hash_table_insert (table t, void *value, void *comparator) -{ - int success; - unsigned int k=(*t->key_function)(comparator); - table_entry *position=table_lookup(t,comparator,k, - t->compare_function,&success); - table_entry entry; - - if (success) { - entry = *position; - } else { - entry = (table_entry)malloc(sizeof(struct table_entry)); - memset(entry, 0, sizeof(struct table_entry)); - entry->next= *position; - *position=entry; - t->number_of_entries++; - } - entry->value=value; - entry->key=k; - if (t->number_of_entries > t->size) resize_table(t,t->size*2); -} - -/* Function: hash_table_remove - * Arguments: t: the table to remove the object from - * comparator: the index value of the object to remove - * Returns: - */ -void hash_table_remove (table t, void *comparator) -{ - int success; - table_entry temp; - table_entry *position=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function,&success); - if(success) { - temp=*position; - *position=(*position)->next; - free(temp); /* the value? */ - t->number_of_entries--; - } -} - -/* Function: hash_iterate_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - */ -void hash_iterate_table_entries(table t, - void (*handler)(void *,void *), - void *arg) -{ - int i; - table_entry *j,*next; - - for (i=0;i<t->size;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - (*handler)(arg,(*j)->value); - } -} - -/* Function: hash_filter_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - * Notes: operations on the table inside handler are not safe - * - * filter_table_entires() calls the handler function for each - * item in the table, passing it and arg. The handler function - * returns 1 if it is to be retained in the table, and 0 - * if it is to be removed. - */ -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) -{ - int i; - table_entry *j,*next,v; - - for (i=0;i<t->size;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - if (!(*handler)(arg,(*j)->value)){ - next=j; - v=*j; - *j=(*j)->next; - free(v); - t->number_of_entries--; - } - } -} - -/* Function: destroy_table - * Arguments: t: the table to free - * thunk: a function to call with each element, - * most likely free() - * Returns: nothing - */ -void hash_destroy_table(table t,void (*thunk)(void *)) -{ - table_entry j,next; - int i; - for (i=0;i<t->size;i++) - for (j=t->entries[i];j;j=next){ - next=j->next; - if (thunk) (*thunk)(j->value); - free(j); - } - free(t->entries); - free(t); -} diff --git a/lnet/ulnds/socklnd/table.h b/lnet/ulnds/socklnd/table.h deleted file mode 100644 index 0cb96697d073a15cccff172b107bbbae0085077e..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/table.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef E_TABLE -#define E_TABLE - -typedef struct table_entry { - unsigned int key; - void *value; - struct table_entry *next; -} *table_entry; - - -typedef struct table { - unsigned int size; - int number_of_entries; - table_entry *entries; - int (*compare_function)(void *, void *); - unsigned int (*key_function)(void *); -} *table; - -/* table.c */ -unsigned int key_from_int(int i); -unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), - unsigned int (*key_function)(void *)); -void *hash_table_find(table t, void *comparator); -void hash_table_insert(table t, void *value, void *comparator); -void hash_table_remove(table t, void *comparator); -void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); -void hash_destroy_table(table t, void (*thunk)(void *)); - -#endif diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c deleted file mode 100644 index bd73fb2d40b09c5feb5cd6dec19cfb03d0e1034d..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/tcplnd.c +++ /dev/null @@ -1,249 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* tcpnal.c: - This file implements the TCP-based nal by providing glue - between the connection service and the generic NAL implementation */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <procbridge.h> -#include <connection.h> -#include <errno.h> - -#ifndef __CYGWIN__ -#include <syscall.h> -#endif - -void -tcpnal_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive) -{ - bridge b = (bridge)ni->ni_data; - connection c; - - if (!alive) { - LBUG(); - } - - c = force_tcp_connection((manager)b->lower, nid, b->local); - if (c == NULL) - CERROR("Can't create connection to %s\n", - libcfs_nid2str(nid)); -} - -/* - * sends a packet to the peer, after insuring that a connection exists - */ -int tcpnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - lnet_process_id_t target = lntmsg->msg_target; - unsigned int niov = lntmsg->msg_niov; - struct iovec *iov = lntmsg->msg_iov; - unsigned int offset = lntmsg->msg_offset; - unsigned int len = lntmsg->msg_len; - - connection c; - bridge b = (bridge)ni->ni_data; - struct iovec tiov[257]; - static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER; - int rc = 0; - int sysrc; - int total; - int ntiov; - int i; - - if (!(c = force_tcp_connection((manager)b->lower, target.nid, - b->local))) - return(-EIO); - - /* TODO: these results should be checked. furthermore, provision - must be made for the SIGPIPE which is delivered when - writing on a tcp socket which has closed underneath - the application. there is a linux flag in the sendmsg - call which turns off the signally behaviour, but its - nonstandard */ - - LASSERT (niov <= 256); - LASSERT (len == 0 || iov != NULL); /* I don't understand kiovs */ - - tiov[0].iov_base = hdr; - tiov[0].iov_len = sizeof(lnet_hdr_t); - ntiov = 1 + lnet_extract_iov(256, &tiov[1], niov, iov, offset, len); - - pthread_mutex_lock(&send_lock); -#if 1 - for (i = total = 0; i < ntiov; i++) - total += tiov[i].iov_len; - - sysrc = syscall(SYS_writev, c->fd, tiov, ntiov); - if (sysrc != total) { - fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", - rc, total, errno); - rc = -errno; - } -#else - for (i = total = 0; i <= ntiov; i++) { - rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0); - - if (rc != tiov[i].iov_len) { - fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n", - rc, tiov[i].iov_len, errno); - rc = -errno; - break; - } - total += rc; - } -#endif -#if 0 - fprintf (stderr, "sent %s total %d in %d frags\n", - hdr->type == LNET_MSG_ACK ? "ACK" : - hdr->type == LNET_MSG_PUT ? "PUT" : - hdr->type == LNET_MSG_GET ? "GET" : - hdr->type == LNET_MSG_REPLY ? "REPLY" : - hdr->type == LNET_MSG_HELLO ? "HELLO" : "UNKNOWN", - total, niov + 1); -#endif - pthread_mutex_unlock(&send_lock); - - if (rc == 0) { - /* NB the NAL only calls lnet_finalize() if it returns 0 - * from cb_send() */ - lnet_finalize(ni, lntmsg, 0); - } - - return(rc); -} - - -int tcpnal_recv(lnet_ni_t *ni, - void *private, - lnet_msg_t *cookie, - int delayed, - unsigned int niov, - struct iovec *iov, - lnet_kiov_t *kiov, - unsigned int offset, - unsigned int mlen, - unsigned int rlen) -{ - struct iovec tiov[256]; - int ntiov; - int i; - - if (mlen == 0) - goto finalize; - - LASSERT(iov != NULL); /* I don't understand kiovs */ - - ntiov = lnet_extract_iov(256, tiov, niov, iov, offset, mlen); - - /* FIXME - * 1. Is this effecient enough? change to use readv() directly? - * 2. need check return from read_connection() - * - MeiJia - */ - for (i = 0; i < ntiov; i++) - read_connection(private, tiov[i].iov_base, tiov[i].iov_len); - -finalize: - /* FIXME; we always assume success here... */ - lnet_finalize(ni, cookie, 0); - - LASSERT(rlen >= mlen); - - if (mlen != rlen){ - char *trash=malloc(rlen - mlen); - - /*TODO: check error status*/ - read_connection(private, trash, rlen - mlen); - free(trash); - } - - return(0); -} - - -/* Function: from_connection: - * Arguments: c: the connection to read from - * Returns: whether or not to continue reading from this connection, - * expressed as a 1 to continue, and a 0 to not - * - * from_connection() is called from the select loop when i/o is - * available. It attempts to read the portals header and - * pass it to the generic library for processing. - */ -static int from_connection(void *a, void *d) -{ - connection c = d; - bridge b = a; - lnet_hdr_t hdr; - int rc; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))) { - /* replace dest_nid,pid (socknal sets its own) */ - hdr.dest_nid = cpu_to_le64(b->b_ni->ni_nid); - hdr.dest_pid = cpu_to_le32(the_lnet.ln_pid); - - rc = lnet_parse(b->b_ni, &hdr, c->peer_nid, c, 0); - if (rc < 0) { - CERROR("Error %d from lnet_parse\n", rc); - return 0; - } - - return(1); - } - return(0); -} - - -void tcpnal_shutdown(bridge b) -{ - shutdown_connections(b->lower); -} - -/* Function: PTL_IFACE_TCP - * Arguments: pid_request: desired port number to bind to - * desired: passed NAL limits structure - * actual: returned NAL limits structure - * Returns: a nal structure on success, or null on failure - */ -int tcpnal_init(bridge b) -{ - manager m; - - tcpnal_set_global_params(); - - if (!(m = init_connections(from_connection, b))) { - /* TODO: this needs to shut down the newly created junk */ - return(-ENXIO); - } - b->lower = m; - return(0); -} diff --git a/lnet/ulnds/socklnd/timer.h b/lnet/ulnds/socklnd/timer.h deleted file mode 100644 index aaf39d2d1f6d4fc879589a19a0354e23ac215350..0000000000000000000000000000000000000000 --- a/lnet/ulnds/socklnd/timer.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* TODO: make this an explicit type when they become available */ -typedef unsigned long long when; - -typedef struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -} *timer; - -timer register_timer(when, void (*f)(void *), void *a); -void remove_timer(timer t); -void timer_loop(void); -void initialize_timer(void); -void register_thunk(void (*f)(void *),void *a); - - -#define HZ 0x100000000ull - - diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore deleted file mode 100644 index 13c2683e33b2c851e1416c12b4008af9ed9ef069..0000000000000000000000000000000000000000 --- a/lnet/utils/.cvsignore +++ /dev/null @@ -1,10 +0,0 @@ -Makefile -Makefile.in -acceptor -debugctl -ptlctl -.deps -routerstat -wirecheck -gmlndnid -.*.cmd diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am deleted file mode 100644 index 9cd3f2546e9780754344b54fd2391ea938b1f71f..0000000000000000000000000000000000000000 --- a/lnet/utils/Makefile.am +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -## $(srcdir)/../ for <portals/*.h>, ../../ for generated <config.h> -#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include -#LINK = $(CC) -o $@ - -if LIBLUSTRE -noinst_LIBRARIES = libuptlctl.a -endif - -libuptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c -libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS) -libuptlctl_a_CFLAGS = $(LLCFLAGS) -DLUSTRE_UTILS=1 - -sbin_PROGRAMS = debugctl - -lib_LIBRARIES = libptlctl.a - -libptlctl_a_SOURCES = portals.c nidstrings.c debug.c l_ioctl.c parser.c parser.h - -if UTILS -sbin_PROGRAMS += ptlctl routerstat wirecheck -if BUILD_GMLND -sbin_PROGRAMS += gmlndnid -endif -endif - -wirecheck_SOURCES = wirecheck.c - -gmlndnid_SOURCES = gmlndnid.c -gmlndnid_CFLAGS = $(GMCPPFLAGS) -gmlndnid_LDFLAGS = -static -gmlndnid_LDADD = $(GMLIBS) -lgm - -ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) -ptlctl_DEPENDENCIES = libptlctl.a - -routerstat_SOURCES = routerstat.c - -debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE) -debugctl_DEPENDENCIES = libptlctl.a - -nidstrings.c: @top_srcdir@/lnet/libcfs/nidstrings.c - ln -sf $< $@ diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c deleted file mode 100644 index 4affbbba1e3df111dfd8c94e4f8d327f04608d75..0000000000000000000000000000000000000000 --- a/lnet/utils/debug.c +++ /dev/null @@ -1,926 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre Networking, http://www.lustre.org. - * - * LNET is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * LNET is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with LNET; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#define __USE_FILE_OFFSET64 -#define _GNU_SOURCE - -#include <stdio.h> -#ifdef HAVE_NETDB_H -#include <netdb.h> -#endif -#include <stdlib.h> -#include <string.h> -#ifdef HAVE_SYS_IOCTL_H -#include <sys/ioctl.h> -#endif -#ifndef _IOWR -#include "ioctl.h" -#endif -#include <fcntl.h> -#include <errno.h> -#include <unistd.h> -#include <assert.h> - -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <sys/utsname.h> - -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <libcfs/portals_utils.h> -#include "parser.h" - -#include <time.h> - -static char rawbuf[8192]; -static char *buf = rawbuf; -static int max = 8192; -/*static int g_pfd = -1;*/ -static int subsystem_mask = ~0; -static int debug_mask = ~0; - -#define MAX_MARK_SIZE 256 - -static const char *libcfs_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", - "ost", "class", "log", "llite", - "rpc", "mgmt", "lnet", "lnd", - "pinger", "filter", "", "echo", - "ldlm", "lov", "", "", - "", "", "", "lmv", - "", "sec", "gss", "", - "mgc", "mgs", "fid", "fld", NULL}; -static const char *libcfs_debug_masks[] = - {"trace", "inode", "super", "ext2", - "malloc", "cache", "info", "ioctl", - "neterror", "net", "warning", "buffs", - "other", "dentry", "nettrace", "page", - "dlmtrace", "error", "emerg", "ha", - "rpctrace", "vfstrace", "reada", "mmap", - "config", "console", "quota", "sec", NULL}; - -struct debug_daemon_cmd { - char *cmd; - unsigned int cmdv; -}; - -static const struct debug_daemon_cmd libcfs_debug_daemon_cmd[] = { - {"start", DEBUG_DAEMON_START}, - {"stop", DEBUG_DAEMON_STOP}, - {0, 0} -}; - -#ifdef __linux__ - -#define DAEMON_CTL_NAME "/proc/sys/lnet/daemon_file" -#define SUBSYS_DEBUG_CTL_NAME "/proc/sys/lnet/subsystem_debug" -#define DEBUG_CTL_NAME "/proc/sys/lnet/debug" -#define DUMP_KERNEL_CTL_NAME "/proc/sys/lnet/dump_kernel" - -static int -dbg_open_ctlhandle(const char *str) -{ - int fd; - fd = open(str, O_WRONLY); - if (fd < 0) { - fprintf(stderr, "open %s failed: %s\n", str, - strerror(errno)); - return -1; - } - return fd; -} - -static void -dbg_close_ctlhandle(int fd) -{ - close(fd); -} - -static int -dbg_write_cmd(int fd, char *str, int len) -{ - int rc = write(fd, str, len); - - return (rc == len ? 0 : 1); -} - -#elif defined(__DARWIN__) - -#define DAEMON_CTL_NAME "lnet.trace_daemon" -#define SUBSYS_DEBUG_CTL_NAME "lnet.subsystem_debug" -#define DEBUG_CTL_NAME "lnet.debug" -#define DUMP_KERNEL_CTL_NAME "lnet.trace_dumpkernel" - -static char sysctl_name[128]; -static int -dbg_open_ctlhandle(const char *str) -{ - - if (strlen(str)+1 > 128) { - fprintf(stderr, "sysctl name is too long: %s.\n", str); - return -1; - } - strcpy(sysctl_name, str); - - return 0; -} - -static void -dbg_close_ctlhandle(int fd) -{ - sysctl_name[0] = '\0'; - return; -} - -static int -dbg_write_cmd(int fd, char *str, int len) -{ - int rc; - - rc = sysctlbyname(sysctl_name, NULL, NULL, str, len+1); - if (rc != 0) { - fprintf(stderr, "sysctl %s with cmd (%s) error: %d\n", - sysctl_name, str, errno); - } - return (rc == 0 ? 0: 1); -} - -#else -#error - Unknown sysctl convention. -#endif - -static int do_debug_mask(char *name, int enable) -{ - int found = 0, i; - - for (i = 0; libcfs_debug_subsystems[i] != NULL; i++) { - if (strcasecmp(name, libcfs_debug_subsystems[i]) == 0 || - strcasecmp(name, "all_subs") == 0) { - printf("%s output from subsystem \"%s\"\n", - enable ? "Enabling" : "Disabling", - libcfs_debug_subsystems[i]); - if (enable) - subsystem_mask |= (1 << i); - else - subsystem_mask &= ~(1 << i); - found = 1; - } - } - for (i = 0; libcfs_debug_masks[i] != NULL; i++) { - if (strcasecmp(name, libcfs_debug_masks[i]) == 0 || - strcasecmp(name, "all_types") == 0) { - printf("%s output of type \"%s\"\n", - enable ? "Enabling" : "Disabling", - libcfs_debug_masks[i]); - if (enable) - debug_mask |= (1 << i); - else - debug_mask &= ~(1 << i); - found = 1; - } - } - - return found; -} - -int dbg_initialize(int argc, char **argv) -{ - return 0; -} - -int jt_dbg_filter(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 0)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - return 0; -} - -int jt_dbg_show(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 1)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - - return 0; -} - -static int applymask(char* procpath, int value) -{ - int rc; - char buf[64]; - int len = snprintf(buf, 64, "%d", value); - - int fd = dbg_open_ctlhandle(procpath); - if (fd == -1) { - fprintf(stderr, "Unable to open %s: %s\n", - procpath, strerror(errno)); - return fd; - } - rc = dbg_write_cmd(fd, buf, len+1); - if (rc != 0) { - fprintf(stderr, "Write to %s failed: %s\n", - procpath, strerror(errno)); - return rc; - } - dbg_close_ctlhandle(fd); - return 0; -} - -static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) -{ - if (!dump_filename) { - applymask(SUBSYS_DEBUG_CTL_NAME, subs_mask); - applymask(DEBUG_CTL_NAME, debug_mask); - } else { - struct libcfs_debug_ioctl_data data; - - data.hdr.ioc_len = sizeof(data); - data.hdr.ioc_version = 0; - data.subs = subs_mask; - data.debug = debug_mask; - - dump(OBD_DEV_ID, LIBCFS_IOC_DEBUG_MASK, &data); - } - printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/lnet\n", - subs_mask, debug_mask); -} - -int jt_dbg_list(int argc, char **argv) -{ - int i; - - if (argc != 2) { - fprintf(stderr, "usage: %s <subs || types>\n", argv[0]); - return 0; - } - - if (strcasecmp(argv[1], "subs") == 0) { - printf("Subsystems: all_subs"); - for (i = 0; libcfs_debug_subsystems[i] != NULL; i++) - if (libcfs_debug_subsystems[i][0]) - printf(", %s", libcfs_debug_subsystems[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "types") == 0) { - printf("Types: all_types"); - for (i = 0; libcfs_debug_masks[i] != NULL; i++) - printf(", %s", libcfs_debug_masks[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "applymasks") == 0) { - applymask_all(subsystem_mask, debug_mask); - } - return 0; -} - -/* all strings nul-terminated; only the struct and hdr need to be freed */ -struct dbg_line { - struct ptldebug_header *hdr; - char *file; - char *fn; - char *text; -}; - -static int cmp_rec(const void *p1, const void *p2) -{ - struct dbg_line *d1 = *(struct dbg_line **)p1; - struct dbg_line *d2 = *(struct dbg_line **)p2; - - if (d1->hdr->ph_sec < d2->hdr->ph_sec) - return -1; - if (d1->hdr->ph_sec == d2->hdr->ph_sec && - d1->hdr->ph_usec < d2->hdr->ph_usec) - return -1; - if (d1->hdr->ph_sec == d2->hdr->ph_sec && - d1->hdr->ph_usec == d2->hdr->ph_usec) - return 0; - return 1; -} - -static void print_rec(struct dbg_line **linev, int used, FILE *out) -{ - int i; - - for (i = 0; i < used; i++) { - struct dbg_line *line = linev[i]; - struct ptldebug_header *hdr = line->hdr; - - fprintf(out, "%08x:%08x:%u:%u.%06llu:%u:%u:%u:(%s:%u:%s()) %s", - hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id, - hdr->ph_sec, (unsigned long long)hdr->ph_usec, - hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid, - line->file, hdr->ph_line_num, line->fn, line->text); - free(line->hdr); - free(line); - } - free(linev); -} - -static int add_rec(struct dbg_line *line, struct dbg_line ***linevp, int *lenp, - int used) -{ - struct dbg_line **linev = *linevp; - - if (used == *lenp) { - int nlen = *lenp + 512; - int nsize = nlen * sizeof(struct dbg_line *); - - linev = *linevp ? realloc(*linevp, nsize) : malloc(nsize); - if (!linev) - return 0; - *linevp = linev; - *lenp = nlen; - } - linev[used] = line; - return 1; -} - -static int parse_buffer(FILE *in, FILE *out) -{ - struct dbg_line *line; - struct ptldebug_header *hdr; - char buf[4097], *p; - int rc; - unsigned long dropped = 0, kept = 0; - struct dbg_line **linev = NULL; - int linev_len = 0; - - while (1) { - rc = fread(buf, sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1, in); - if (rc <= 0) - break; - - hdr = (void *)buf; - if (hdr->ph_len == 0) - break; - if (hdr->ph_len > 4094) { - fprintf(stderr, "unexpected large record: %d bytes. " - "aborting.\n", - hdr->ph_len); - break; - } - - rc = fread(buf + sizeof(hdr->ph_len) + sizeof(hdr->ph_flags), 1, - hdr->ph_len - sizeof(hdr->ph_len) - sizeof(hdr->ph_flags), in); - if (rc <= 0) - break; - - if (hdr->ph_mask && - (!(subsystem_mask & hdr->ph_subsys) || - (!(debug_mask & hdr->ph_mask)))) { - dropped++; - continue; - } - - line = malloc(sizeof(*line)); - if (line == NULL) { - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - - line->hdr = malloc(hdr->ph_len + 1); - if (line->hdr == NULL) { - free(line); - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - - p = (void *)line->hdr; - memcpy(line->hdr, buf, hdr->ph_len); - p[hdr->ph_len] = '\0'; - - p += sizeof(*hdr); - line->file = p; - p += strlen(line->file) + 1; - line->fn = p; - p += strlen(line->fn) + 1; - line->text = p; - - if (!add_rec(line, &linev, &linev_len, kept)) { - fprintf(stderr, "malloc failed; printing accumulated " - "records and exiting.\n"); - break; - } - kept++; - } - - if (linev) { - qsort(linev, kept, sizeof(struct dbg_line *), cmp_rec); - print_rec(linev, kept, out); - } - - printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", - dropped + kept, kept, dropped); - return 0; -} - -int jt_dbg_debug_kernel(int argc, char **argv) -{ - char filename[4096]; - struct stat st; - int rc, raw = 0, fd; - FILE *in, *out = stdout; - - if (argc > 3) { - fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); - return 0; - } - - if (argc > 2) { - raw = atoi(argv[2]); - } else if (argc > 1 && (argv[1][0] == '0' || argv[1][0] == '1')) { - raw = atoi(argv[1]); - argc--; - } - - /* If we are dumping raw (which means no conversion step to ASCII) - * then dump directly to any supplied filename, otherwise this is - * just a temp file and we dump to the real file at convert time. */ - if (argc > 1 && raw) - strcpy(filename, argv[1]); - else - sprintf(filename, "/tmp/lustre-log.%lu.%u",time(NULL),getpid()); - - if (stat(filename, &st) == 0 && S_ISREG(st.st_mode)) - unlink(filename); - - fd = dbg_open_ctlhandle(DUMP_KERNEL_CTL_NAME); - if (fd < 0) { - fprintf(stderr, "open(dump_kernel) failed: %s\n", - strerror(errno)); - return 1; - } - - rc = dbg_write_cmd(fd, filename, strlen(filename)); - if (rc != 0) { - fprintf(stderr, "write(%s) failed: %s\n", filename, - strerror(errno)); - close(fd); - return 1; - } - dbg_close_ctlhandle(fd); - - if (raw) - return 0; - - in = fopen(filename, "r"); - if (in == NULL) { - if (errno == ENOENT) /* no dump file created */ - return 0; - - fprintf(stderr, "fopen(%s) failed: %s\n", filename, - strerror(errno)); - return 1; - } - if (argc > 1) { - out = fopen(argv[1], "w"); - if (out == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - fclose(in); - return 1; - } - } - - rc = parse_buffer(in, out); - fclose(in); - if (argc > 1) - fclose(out); - if (rc) { - fprintf(stderr, "parse_buffer failed; leaving tmp file %s " - "behind.\n", filename); - } else { - rc = unlink(filename); - if (rc) - fprintf(stderr, "dumped successfully, but couldn't " - "unlink tmp file %s: %s\n", filename, - strerror(errno)); - } - return rc; -} - -int jt_dbg_debug_file(int argc, char **argv) -{ - int fdin; - int fdout; - FILE *in; - FILE *out = stdout; - int rc; - - if (argc > 3 || argc < 2) { - fprintf(stderr, "usage: %s <input> [output]\n", argv[0]); - return 0; - } - - fdin = open(argv[1], O_RDONLY | O_LARGEFILE); - if (fdin == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[1], - strerror(errno)); - return 1; - } - in = fdopen(fdin, "r"); - if (in == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - close(fdin); - return 1; - } - if (argc > 2) { - fdout = open(argv[2], - O_CREAT | O_TRUNC | O_WRONLY | O_LARGEFILE, - 0600); - if (fdout == -1) { - fprintf(stderr, "open(%s) failed: %s\n", argv[2], - strerror(errno)); - fclose(in); - return 1; - } - out = fdopen(fdout, "w"); - if (out == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - fclose(in); - close(fdout); - return 1; - } - } - - rc = parse_buffer(in, out); - - fclose(in); - if (out != stdout) - fclose(out); - - return rc; -} - -const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n"; - -int jt_dbg_debug_daemon(int argc, char **argv) -{ - int rc; - int fd; - - if (argc <= 1) { - fprintf(stderr, debug_daemon_usage, argv[0]); - return 1; - } - - fd = dbg_open_ctlhandle(DAEMON_CTL_NAME); - if (fd < 0) - return -1; - - rc = -1; - if (strcasecmp(argv[1], "start") == 0) { - if (argc < 3 || argc > 4 || - (argc == 4 && strlen(argv[3]) > 5)) { - fprintf(stderr, debug_daemon_usage, argv[0]); - goto out; - } - if (argc == 4) { - char buf[12]; - const long min_size = 10; - const long max_size = 20480; - long size; - char *end; - - size = strtoul(argv[3], &end, 0); - if (size < min_size || - size > max_size || - *end != 0) { - fprintf(stderr, "size %s invalid, must be in " - "the range %ld-%ld MB\n", argv[3], - min_size, max_size); - goto out; - } - snprintf(buf, sizeof(buf), "size=%ld", size); - rc = dbg_write_cmd(fd, buf, strlen(buf)); - - if (rc != 0) { - fprintf(stderr, "set %s failed: %s\n", - buf, strerror(errno)); - goto out; - } - } - - rc = dbg_write_cmd(fd, argv[2], strlen(argv[2])); - if (rc != 0) { - fprintf(stderr, "start debug_daemon on %s failed: %s\n", - argv[2], strerror(errno)); - goto out; - } - rc = 0; - goto out; - } - if (strcasecmp(argv[1], "stop") == 0) { - rc = dbg_write_cmd(fd, "stop", 4); - if (rc != 0) { - fprintf(stderr, "stopping debug_daemon failed: %s\n", - strerror(errno)); - goto out; - } - - rc = 0; - goto out; - } - - fprintf(stderr, debug_daemon_usage, argv[0]); - rc = -1; -out: - dbg_close_ctlhandle(fd); - return rc; -} - -int jt_dbg_clear_debug_buf(int argc, char **argv) -{ - int rc; - struct libcfs_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLEAR_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_CLEAR_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_dbg_mark_debug_buf(int argc, char **argv) -{ - static char scratch[MAX_MARK_SIZE] = { '\0' }; - int rc, max_size = MAX_MARK_SIZE-1; - struct libcfs_ioctl_data data = { 0 }; - char *text; - time_t now = time(NULL); - - if (argc > 1) { - int count; - text = scratch; - strncpy(text, argv[1], max_size); - max_size-=strlen(argv[1]); - for (count = 2; (count < argc) && (max_size > 0); count++){ - strncat(text, " ", max_size); - max_size -= 1; - strncat(text, argv[count], max_size); - max_size -= strlen(argv[count]); - } - } else { - text = ctime(&now); - } - - data.ioc_inllen1 = strlen(text) + 1; - data.ioc_inlbuf1 = text; - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MARK_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_MARK_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -static struct mod_paths { - char *name, *path; -} mod_paths[] = { - {"libcfs", "lnet/libcfs"}, - {"lnet", "lnet/lnet"}, - {"kciblnd", "lnet/klnds/ciblnd"}, - {"kgmlnd", "lnet/klnds/gmlnd"}, - {"kmxlnd", "lnet/klnds/mxlnd"}, - {"kiiblnd", "lnet/klnds/iiblnd"}, - {"ko2iblnd", "lnet/klnds/o2iblnd"}, - {"kopeniblnd", "lnet/klnds/openiblnd"}, - {"kptllnd", "lnet/klnds/ptllnd"}, - {"kqswlnd", "lnet/klnds/qswlnd"}, - {"kralnd", "lnet/klnds/ralnd"}, - {"ksocklnd", "lnet/klnds/socklnd"}, - {"ktdilnd", "lnet/klnds/tdilnd"}, - {"kviblnd", "lnet/klnds/viblnd"}, - {"lvfs", "lustre/lvfs"}, - {"obdclass", "lustre/obdclass"}, - {"llog_test", "lustre/obdclass"}, - {"ptlrpc_gss", "lustre/ptlrpc/gss"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"gks", "lustre/sec/gks"}, - {"gkc", "lustre/sec/gks"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"lustre", "lustre/llite"}, - {"ldiskfs", "lustre/ldiskfs"}, - {"smfs", "lustre/smfs"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"lov", "lustre/lov"}, - {"lmv", "lustre/lmv"}, - {"fsfilt_ext3", "lustre/lvfs"}, - {"fsfilt_reiserfs", "lustre/lvfs"}, - {"fsfilt_smfs", "lustre/lvfs"}, - {"fsfilt_ldiskfs", "lustre/lvfs"}, - {"mds_ext3", "lustre/mds"}, - {"cobd", "lustre/cobd"}, - {"cmobd", "lustre/cmobd"}, - {"lquota", "lustre/quota"}, - {"mgs", "lustre/mgs"}, - {"mgc", "lustre/mgc"}, - {"mdt", "lustre/mdt"}, - {"mdd", "lustre/mdd"}, - {"osd", "lustre/osd"}, - {"cmm", "lustre/cmm"}, - {"fid", "lustre/fid"}, - {"fld", "lustre/fld"}, - {NULL, NULL} -}; - -static int jt_dbg_modules_2_4(int argc, char **argv) -{ -#ifdef HAVE_LINUX_VERSION_H -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths *mp; - char *path = ""; - char *kernel = "linux"; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - for (mp = mod_paths; mp->name != NULL; mp++) { - struct module_info info; - int rc; - size_t crap; - int query_module(const char *name, int which, void *buf, - size_t bufsize, size_t *ret); - - rc = query_module(mp->name, QM_INFO, &info, sizeof(info), - &crap); - if (rc < 0) { - if (errno != ENOENT) - printf("query_module(%s) failed: %s\n", - mp->name, strerror(errno)); - } else { - printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path, - path[0] ? "/" : "", mp->path, mp->name, - info.addr + sizeof(struct module)); - } - } - - return 0; -#endif // Headers are 2.6-only -#endif // !HAVE_LINUX_VERSION_H - return -EINVAL; -} - -static int jt_dbg_modules_2_5(int argc, char **argv) -{ - struct mod_paths *mp; - char *path = ""; - char *kernel = "linux"; - const char *proc = "/proc/modules"; - char modname[128], others[4096]; - long modaddr; - int rc; - FILE *file; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - file = fopen(proc, "r"); - if (!file) { - printf("failed open %s: %s\n", proc, strerror(errno)); - return 0; - } - - while ((rc = fscanf(file, "%s %s %s %s %s %lx\n", - modname, others, others, others, others, &modaddr)) == 6) { - for (mp = mod_paths; mp->name != NULL; mp++) { - if (!strcmp(mp->name, modname)) - break; - } - if (mp->name) { - printf("add-symbol-file %s%s%s/%s.o 0x%0lx\n", path, - path[0] ? "/" : "", mp->path, mp->name, modaddr); - } - } - - fclose(file); - return 0; -} - -int jt_dbg_modules(int argc, char **argv) -{ - int rc = 0; - struct utsname sysinfo; - - rc = uname(&sysinfo); - if (rc) { - printf("uname() failed: %s\n", strerror(errno)); - return 0; - } - - if (sysinfo.release[2] > '4') { - return jt_dbg_modules_2_5(argc, argv); - } else { - return jt_dbg_modules_2_4(argc, argv); - } - - return 0; -} - -int jt_dbg_panic(int argc, char **argv) -{ - int rc; - struct libcfs_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (libcfs_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "libcfs_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PANIC, buf); - if (rc) { - fprintf(stderr, "IOC_LIBCFS_PANIC failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} diff --git a/lnet/utils/debugctl.c b/lnet/utils/debugctl.c deleted file mode 100644 index cf70fd8b115b9cb68135eb5cc38f9e1dee9fd170..0000000000000000000000000000000000000000 --- a/lnet/utils/debugctl.c +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include "parser.h" - - -command_t list[] = { - {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"}, - {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"}, - {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"}, - {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"}, - {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"}, - {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"}, - {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"}, - {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"}, - {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"}, - {"panic", jt_dbg_panic, 0, "cause the kernel to panic"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (dbg_initialize(argc, argv) < 0) - exit(2); - - register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); - - Parser_init("debugctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - unregister_ioc_dev(LNET_DEV_ID); - return 0; -} diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c deleted file mode 100644 index ce5cb14ede7ea989ae909b22b9d78cfe41b616d7..0000000000000000000000000000000000000000 --- a/lnet/utils/gmlndnid.c +++ /dev/null @@ -1,184 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Los Alamos National Laboratory (LANL) - * - * This file is part of Lustre, http://www.lustre.org/ - * - * This file is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <stdio.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/tcp.h> -#include <netdb.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <unistd.h> -#include <syslog.h> -#include <errno.h> - -#include <lnet/api-support.h> -#include <lnet/lib-types.h> - -#include <gm.h> - -/* - * portals always uses unit 0 - * Can this be configurable? - */ -#define GM_UNIT 0 - -void -usage(char *prg, int h) -{ - fprintf(stderr, - "usage %s -h\n" - " %s [-l] [-n hostname] [-L] [hostnames]\n", prg); - - if (h) - printf("Print Myrinet Global network ids for specified hosts\n" - "-l print local host's ID\n" - "-n hostname print given host's ID\n" - "-L print Myringet local net ID too\n" - "[hostnames] print ids of given hosts (local if none)\n"); -} - -gm_status_t -print_gmid(char *name, int name_fieldlen, int show_local_id) -{ - struct gm_port *gm_port; - int gm_port_id; - gm_status_t gm_status; - unsigned int local_id; - unsigned int global_id; - - gm_status = gm_init(); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_init: %s\n", gm_strerror(gm_status)); - return gm_status; - } - - gm_port_id = 2; - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, "gmnalnid", - GM_API_VERSION); - if (gm_status != GM_SUCCESS) { - int num_ports = gm_num_ports(gm_port); - - /* Couldn't open port 2, try 4 ... num_ports */ - for (gm_port_id = 4; gm_port_id < num_ports; gm_port_id++) { - gm_status = gm_open(&gm_port, GM_UNIT, gm_port_id, - "gmnalnid", GM_API_VERSION); - if (gm_status == GM_SUCCESS) - break; - } - - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_open: %s\n",gm_strerror(gm_status)); - goto out_0; - } - } - - if (name == NULL) { - local_id = 1; - name = "<local>"; - } else { - gm_status = gm_host_name_to_node_id_ex(gm_port, 1000000, name, - &local_id); - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_host_name_to_node_id_ex(%s): %s\n", - name, gm_strerror(gm_status)); - goto out_1; - } - } - - gm_status = gm_node_id_to_global_id(gm_port, local_id, &global_id) ; - if (gm_status != GM_SUCCESS) { - fprintf(stderr, "gm_node_id_to_global_id(%s:%d): %s\n", - name, local_id, gm_strerror(gm_status)); - goto out_1; - } - - if (name_fieldlen > 0) - printf ("%*s ", name_fieldlen, name); - - if (!show_local_id) - printf("0x%x\n", global_id); - else - printf("local 0x%x global 0x%x\n", local_id, global_id); - - out_1: - gm_close(gm_port); - out_0: - gm_finalize(); - - return gm_status; -} - -int -main (int argc, char **argv) -{ - int c; - gm_status_t gmrc; - int rc; - int max_namelen = 0; - int show_local_id = 0; - - while ((c = getopt(argc, argv, "n:lLh")) != -1) - switch(c) { - case 'h': - usage(argv[0], 1); - return 0; - - case 'L': - show_local_id = 1; - break; - - case 'n': - gmrc = print_gmid(optarg, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - - case 'l': - gmrc = print_gmid(NULL, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - - default: - usage(argv[0], 0); - return 2; - } - - if (optind == argc) { - gmrc = print_gmid(NULL, 0, show_local_id); - return (gmrc == GM_SUCCESS) ? 0 : 1; - } - - if (optind != argc - 1) - for (c = optind; c < argc; c++) - if (strlen(argv[c]) > max_namelen) - max_namelen = strlen(argv[c]); - - rc = 0; - - for (c = optind; c < argc; c++) { - gmrc = print_gmid(argv[c], max_namelen, show_local_id); - - if (gmrc != GM_SUCCESS) - rc = 1; - } - - return rc; -} diff --git a/lnet/utils/l_ioctl.c b/lnet/utils/l_ioctl.c deleted file mode 100644 index 0bdb7826ee397b9b2fca5dda9de851bad999f4ee..0000000000000000000000000000000000000000 --- a/lnet/utils/l_ioctl.c +++ /dev/null @@ -1,358 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define __USE_FILE_OFFSET64 - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <unistd.h> - -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <libcfs/portals_utils.h> - - -static ioc_handler_t do_ioctl; /* forward ref */ -static ioc_handler_t *current_ioc_handler = &do_ioctl; - -struct ioc_dev { - const char * dev_name; - int dev_fd; - int dev_major; - int dev_minor; -}; - -static struct ioc_dev ioc_dev_list[10]; - -struct dump_hdr { - int magic; - int dev_id; - unsigned int opc; -}; - -char *dump_filename; - -void -set_ioc_handler (ioc_handler_t *handler) -{ - if (handler == NULL) - current_ioc_handler = do_ioctl; - else - current_ioc_handler = handler; -} - -/* Catamount has no <linux/kdev_t.h>, so just define it here */ -#ifndef MKDEV -# define MKDEV(a,b) (((a) << 8) | (b)) -#endif - -static int -open_ioc_dev(int dev_id) -{ - const char * dev_name; - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return -EINVAL; - - dev_name = ioc_dev_list[dev_id].dev_name; - if (dev_name == NULL) { - fprintf(stderr, "unknown device id: %d\n", dev_id); - return -EINVAL; - } - - if (ioc_dev_list[dev_id].dev_fd < 0) { - int fd = open(dev_name, O_RDWR); - - /* Make the /dev/ node if we need to */ - if (fd < 0 && errno == ENOENT) { - if (mknod(dev_name, - S_IFCHR|S_IWUSR|S_IRUSR, - MKDEV(ioc_dev_list[dev_id].dev_major, - ioc_dev_list[dev_id].dev_minor)) == 0) - fd = open(dev_name, O_RDWR); - else - fprintf(stderr, "mknod %s failed: %s\n", - dev_name, strerror(errno)); - } - - if (fd < 0) { - fprintf(stderr, "opening %s failed: %s\n" - "hint: the kernel modules may not be loaded\n", - dev_name, strerror(errno)); - return fd; - } - ioc_dev_list[dev_id].dev_fd = fd; - } - - return ioc_dev_list[dev_id].dev_fd; -} - - -static int -do_ioctl(int dev_id, unsigned int opc, void *buf) -{ - int fd, rc; - - fd = open_ioc_dev(dev_id); - if (fd < 0) - return fd; - - rc = ioctl(fd, opc, buf); - return rc; - -} - -static FILE * -get_dump_file() -{ - FILE *fp = NULL; - - if (!dump_filename) { - fprintf(stderr, "no dump filename\n"); - } else - fp = fopen(dump_filename, "a"); - return fp; -} - -/* - * The dump file should start with a description of which devices are - * used, but for now it will assumed whatever app reads the file will - * know what to do. */ -int -dump(int dev_id, unsigned int opc, void *buf) -{ - FILE *fp; - struct dump_hdr dump_hdr; - struct libcfs_ioctl_hdr * ioc_hdr = (struct libcfs_ioctl_hdr *) buf; - int rc; - - printf("dumping opc %x to %s\n", opc, dump_filename); - - - dump_hdr.magic = 0xdeadbeef; - dump_hdr.dev_id = dev_id; - dump_hdr.opc = opc; - - fp = get_dump_file(); - if (fp == NULL) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp); - if (rc == 1) - rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp); - fclose(fp); - if (rc != 1) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - return 0; -} - -/* register a device to send ioctls to. */ -int -register_ioc_dev(int dev_id, const char * dev_name, int major, int minor) -{ - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return -EINVAL; - - unregister_ioc_dev(dev_id); - - ioc_dev_list[dev_id].dev_name = dev_name; - ioc_dev_list[dev_id].dev_fd = -1; - ioc_dev_list[dev_id].dev_major = major; - ioc_dev_list[dev_id].dev_minor = minor; - - return dev_id; -} - -void -unregister_ioc_dev(int dev_id) -{ - - if (dev_id < 0 || - dev_id >= sizeof(ioc_dev_list) / sizeof(ioc_dev_list[0])) - return; - if (ioc_dev_list[dev_id].dev_name != NULL && - ioc_dev_list[dev_id].dev_fd >= 0) - close(ioc_dev_list[dev_id].dev_fd); - - ioc_dev_list[dev_id].dev_name = NULL; - ioc_dev_list[dev_id].dev_fd = -1; -} - -/* If this file is set, then all ioctl buffers will be - appended to the file. */ -int -set_ioctl_dump(char * file) -{ - if (dump_filename) - free(dump_filename); - - dump_filename = strdup(file); - if (dump_filename == NULL) - abort(); - - set_ioc_handler(&dump); - return 0; -} - -int -l_ioctl(int dev_id, unsigned int opc, void *buf) -{ - return current_ioc_handler(dev_id, opc, buf); -} - -/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer - * in the file. For example: - * - * parse_dump("lctl.dump", l_ioctl); - * - * Note: if using l_ioctl, then you also need to register_ioc_dev() for - * each device used in the dump. - */ -int -parse_dump(char * dump_file, ioc_handler_t ioc_func) -{ - int line =0; - struct stat st; - char *start, *buf, *end; -#ifndef __CYGWIN__ - int fd; -#else - HANDLE fd, hmap; - DWORD size; -#endif - -#ifndef __CYGWIN__ - fd = syscall(SYS_open, dump_file, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "couldn't open %s: %s\n", dump_file, - strerror(errno)); - exit(1); - } - - if (fstat(fd, &st)) { - perror("stat fails"); - exit(1); - } - - if (st.st_size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0); - end = start + st.st_size; - close(fd); - if (start == MAP_FAILED) { - fprintf(stderr, "can't create file mapping\n"); - exit(1); - } -#else - fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - size = GetFileSize(fd, NULL); - if (size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL); - start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0); - end = buf + size; - CloseHandle(fd); - if (start == NULL) { - fprintf(stderr, "can't create file mapping\n"); - exit(1); - } -#endif /* __CYGWIN__ */ - - while (buf < end) { - struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; - struct libcfs_ioctl_hdr * data; - char tmp[8096]; - int rc; - - line++; - - data = (struct libcfs_ioctl_hdr *) (buf + sizeof(*dump_hdr)); - if (buf + data->ioc_len > end ) { - fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf, - data->ioc_len, end); - return -1; - } -#if 0 - printf ("dump_hdr: %lx data: %lx\n", - (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf); - - printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc, - data->ioc_len, data->ioc_version); -#endif - - memcpy(tmp, data, data->ioc_len); - - rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp); - if (rc) { - printf("failed: %d\n", rc); - exit(1); - } - - buf += data->ioc_len + sizeof(*dump_hdr); - } - -#ifndef __CYGWIN__ - munmap(start, end - start); -#else - UnmapViewOfFile(start); - CloseHandle(hmap); -#endif - - return 0; -} - -int -jt_ioc_dump(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - printf("setting dumpfile to: %s\n", argv[1]); - - set_ioctl_dump(argv[1]); - return 0; -} diff --git a/lnet/utils/lbstats b/lnet/utils/lbstats deleted file mode 100755 index a8f08575eb155556664885057cfea9b458a84a66..0000000000000000000000000000000000000000 --- a/lnet/utils/lbstats +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -echo "=== Router Buffers =======" -test -e /proc/sys/lnet/buffers && cat /proc/sys/lnet/buffers -echo -echo "=== NIs ============================================" -test -e /proc/sys/lnet/nis && cat /proc/sys/lnet/nis -echo -echo "=== Peers =============================================================" -test -e /proc/sys/lnet/peers && cat /proc/sys/lnet/peers -echo diff --git a/lnet/utils/lnetunload b/lnet/utils/lnetunload deleted file mode 100755 index d9fd9088411b71ccae325d2890b972953edee815..0000000000000000000000000000000000000000 --- a/lnet/utils/lnetunload +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -lnds=$(echo k{sock,qsw,gm,{open,i,v,o2,c}ib,ra,ptl,mx}lnd) - -if lctl network down > /dev/null 2>&1; then - for mod in $lnds; do - if grep "^$mod" /proc/modules >/dev/null 2>&1; then - rmmod $mod - fi - done - - rmmod lnet - rmmod libcfs -fi diff --git a/lnet/utils/parser.c b/lnet/utils/parser.c deleted file mode 100644 index 2f740c117b0a81896914da4c8e646051ff76607c..0000000000000000000000000000000000000000 --- a/lnet/utils/parser.c +++ /dev/null @@ -1,638 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <string.h> -#include <stddef.h> -#include <unistd.h> -#include <sys/param.h> -#include <assert.h> -#include <lnet/api-support.h> - -#include "parser.h" - -static command_t * top_level; /* Top level of commands, initialized by - * InitParser */ -static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ -static int done; /* Set to 1 if user types exit or quit */ - - -/* static functions */ -static char *skipwhitespace(char *s); -static char *skiptowhitespace(char *s); -static command_t *find_cmd(char *name, command_t cmds[], char **next); -static int process(char *s, char **next, command_t *lookup, command_t **result, - char **prev); -static void print_commands(char *str, command_t *table); - -static char * skipwhitespace(char * s) -{ - char * t; - int len; - - len = (int)strlen(s); - for (t = s; t <= s + len && isspace(*t); t++); - return(t); -} - - -static char * skiptowhitespace(char * s) -{ - char * t; - - for (t = s; *t && !isspace(*t); t++); - return(t); -} - -static int line2args(char *line, char **argv, int maxargs) -{ - char *arg; - int i = 0; - - arg = strtok(line, " \t"); - if ( arg ) { - argv[i] = arg; - i++; - } else - return 0; - - while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { - argv[i] = arg; - i++; - } - return i; -} - -/* find a command -- return it if unique otherwise print alternatives */ -static command_t *Parser_findargcmd(char *name, command_t cmds[]) -{ - command_t *cmd; - - for (cmd = cmds; cmd->pc_name; cmd++) { - if (strcmp(name, cmd->pc_name) == 0) - return cmd; - } - return NULL; -} - -int Parser_execarg(int argc, char **argv, command_t cmds[]) -{ - command_t *cmd; - - cmd = Parser_findargcmd(argv[0], cmds); - if ( cmd ) { - int rc = (cmd->pc_func)(argc, argv); - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - return rc; - } else { - printf("Try interactive use without arguments or use one of:\n"); - for (cmd = cmds; cmd->pc_name; cmd++) - printf("\"%s\" ", cmd->pc_name); - printf("\nas argument.\n"); - } - return -1; -} - -/* returns the command_t * (NULL if not found) corresponding to a - _partial_ match with the first token in name. It sets *next to - point to the following token. Does not modify *name. */ -static command_t * find_cmd(char * name, command_t cmds[], char ** next) -{ - int i, len; - - if (!cmds || !name ) - return NULL; - - /* This sets name to point to the first non-white space character, - and next to the first whitespace after name, len to the length: do - this with strtok*/ - name = skipwhitespace(name); - *next = skiptowhitespace(name); - len = *next - name; - if (len == 0) - return NULL; - - for (i = 0; cmds[i].pc_name; i++) { - if (strncasecmp(name, cmds[i].pc_name, len) == 0) { - *next = skipwhitespace(*next); - return(&cmds[i]); - } - } - return NULL; -} - -/* Recursively process a command line string s and find the command - corresponding to it. This can be ambiguous, full, incomplete, - non-existent. */ -static int process(char *s, char ** next, command_t *lookup, - command_t **result, char **prev) -{ - *result = find_cmd(s, lookup, next); - *prev = s; - - /* non existent */ - if ( ! *result ) - return CMD_NONE; - - /* found entry: is it ambigous, i.e. not exact command name and - more than one command in the list matches. Note that find_cmd - points to the first ambiguous entry */ - if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && - find_cmd(s, (*result) + 1, next)) - return CMD_AMBIG; - - /* found a unique command: component or full? */ - if ( (*result)->pc_func ) { - return CMD_COMPLETE; - } else { - if ( *next == '\0' ) { - return CMD_INCOMPLETE; - } else { - return process(*next, next, (*result)->pc_sub_cmd, result, prev); - } - } -} - -#ifdef HAVE_LIBREADLINE -static command_t * match_tbl; /* Command completion against this table */ -static char * command_generator(const char * text, int state) -{ - static int index, - len; - char *name; - - /* Do we have a match table? */ - if (!match_tbl) - return NULL; - - /* If this is the first time called on this word, state is 0 */ - if (!state) { - index = 0; - len = (int)strlen(text); - } - - /* Return next name in the command list that paritally matches test */ - while ( (name = (match_tbl + index)->pc_name) ) { - index++; - - if (strncasecmp(name, text, len) == 0) { - return(strdup(name)); - } - } - - /* No more matches */ - return NULL; -} - -/* probably called by readline */ -static char **command_completion(char * text, int start, int end) -{ - command_t * table; - char * pos; - - match_tbl = top_level; - - for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; table = find_cmd(pos, match_tbl, &pos)) - { - - if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; - } - - return completion_matches(text, command_generator); -} -#endif - -/* take a string and execute the function or print help */ -int execute_line(char * line) -{ - command_t *cmd, *ambig; - char *prev; - char *next, *tmp; - char *argv[MAXARGS]; - int i; - int rc = 0; - - switch( process(line, &next, top_level, &cmd, &prev) ) { - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, cmd, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - cmd = ambig + 1; - } - fprintf(stderr, "\n"); - break; - case CMD_NONE: - fprintf(stderr, "No such command, type help\n"); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_COMPLETE: - i = line2args(line, argv, MAXARGS); - rc = (cmd->pc_func)(i, argv); - - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - - break; - } - - return rc; -} - -int -noop_fn () -{ - return (0); -} - -/* just in case you're ever in an airplane and discover you - forgot to install readline-dev. :) */ -int init_input() -{ - int interactive = isatty (fileno (stdin)); - -#ifdef HAVE_LIBREADLINE - using_history(); - stifle_history(HISTORY); - - if (!interactive) - { - rl_prep_term_function = (rl_vintfunc_t *)noop_fn; - rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; - } - - rl_attempted_completion_function = (CPPFunction *)command_completion; - rl_completion_entry_function = (void *)command_generator; -#endif - return interactive; -} - -#ifndef HAVE_LIBREADLINE -#define add_history(s) -char * readline(char * prompt) -{ - char line[2048]; - int n = 0; - if (prompt) - printf ("%s", prompt); - if (fgets(line, sizeof(line), stdin) == NULL) - return (NULL); - n = strlen(line); - if (n && line[n-1] == '\n') - line[n-1] = '\0'; - return strdup(line); -} -#endif - -/* this is the command execution machine */ -int Parser_commands(void) -{ - char *line, *s; - int rc = 0; - int interactive; - - interactive = init_input(); - - while(!done) { - line = readline(interactive ? parser_prompt : NULL); - - if (!line) break; - - s = skipwhitespace(line); - - if (*s) { - add_history(s); - rc = execute_line(s); - } - - free(line); - } - return rc; -} - - -/* sets the parser prompt */ -void Parser_init(char * prompt, command_t * cmds) -{ - done = 0; - top_level = cmds; - if (parser_prompt) free(parser_prompt); - parser_prompt = strdup(prompt); -} - -/* frees the parser prompt */ -void Parser_exit(int argc, char *argv[]) -{ - done = 1; - free(parser_prompt); - parser_prompt = NULL; -} - -/* convert a string to an integer */ -int Parser_int(char *s, int *val) -{ - int ret; - - if (*s != '0') - ret = sscanf(s, "%d", val); - else if (*(s+1) != 'x') - ret = sscanf(s, "%o", val); - else { - s++; - ret = sscanf(++s, "%x", val); - } - - return(ret); -} - - -void Parser_qhelp(int argc, char *argv[]) { - - printf("Available commands are:\n"); - - print_commands(NULL, top_level); - printf("For more help type: help command-name\n"); -} - -int Parser_help(int argc, char **argv) -{ - char line[1024]; - char *next, *prev, *tmp; - command_t *result, *ambig; - int i; - - if ( argc == 1 ) { - Parser_qhelp(argc, argv); - return 0; - } - - line[0]='\0'; - for ( i = 1 ; i < argc ; i++ ) { - strcat(line, argv[i]); - } - - switch ( process(line, &next, top_level, &result, &prev) ) { - case CMD_COMPLETE: - fprintf(stderr, "%s: %s\n",line, result->pc_help); - break; - case CMD_NONE: - fprintf(stderr, "%s: Unknown command.\n", line); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, result, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - result = ambig + 1; - } - fprintf(stderr, "\n"); - break; - } - return 0; -} - - -void Parser_printhelp(char *cmd) -{ - char *argv[] = { "help", cmd }; - Parser_help(2, argv); -} - -/************************************************************************* - * COMMANDS * - *************************************************************************/ - - -static void print_commands(char * str, command_t * table) { - command_t * cmds; - char buf[80]; - - for (cmds = table; cmds->pc_name; cmds++) { - if (cmds->pc_func) { - if (str) printf("\t%s %s\n", str, cmds->pc_name); - else printf("\t%s\n", cmds->pc_name); - } - if (cmds->pc_sub_cmd) { - if (str) { - sprintf(buf, "%s %s", str, cmds->pc_name); - print_commands(buf, cmds->pc_sub_cmd); - } else { - print_commands(cmds->pc_name, cmds->pc_sub_cmd); - } - } - } -} - -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len) -{ - char *line = NULL; - int size = strlen(prompt) + strlen(deft) + 8; - char *theprompt; - theprompt = malloc(size); - assert(theprompt); - - sprintf(theprompt, "%s [%s]: ", prompt, deft); - - line = readline(theprompt); - free(theprompt); - - if ( line == NULL || *line == '\0' ) { - strncpy(res, deft, len); - } else { - strncpy(res, line, len); - } - - if ( line ) { - free(line); - return res; - } else { - return NULL; - } -} - -/* get integer from prompt, loop forever to get it */ -int Parser_getint(const char *prompt, long min, long max, long deft, int base) -{ - int rc; - long result; - char *line; - int size = strlen(prompt) + 40; - char *theprompt = malloc(size); - assert(theprompt); - sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); - - fflush(stdout); - - do { - line = NULL; - line = readline(theprompt); - if ( !line ) { - fprintf(stdout, "Please enter an integer.\n"); - fflush(stdout); - continue; - } - if ( *line == '\0' ) { - free(line); - result = deft; - break; - } - rc = Parser_arg2int(line, &result, base); - free(line); - if ( rc != 0 ) { - fprintf(stdout, "Invalid string.\n"); - fflush(stdout); - } else if ( result > max || result < min ) { - fprintf(stdout, "Error: response must lie between %ld and %ld.\n", - min, max); - fflush(stdout); - } else { - break; - } - } while ( 1 ) ; - - if (theprompt) - free(theprompt); - return result; - -} - -/* get boolean (starting with YyNn; loop forever */ -int Parser_getbool(const char *prompt, int deft) -{ - int result = 0; - char *line; - int size = strlen(prompt) + 8; - char *theprompt = malloc(size); - assert(theprompt); - - fflush(stdout); - - if ( deft != 0 && deft != 1 ) { - fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", - deft); - assert ( 0 ); - } - sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); - - do { - line = NULL; - line = readline(theprompt); - if ( line == NULL ) { - result = deft; - break; - } - if ( *line == '\0' ) { - result = deft; - break; - } - if ( *line == 'y' || *line == 'Y' ) { - result = 1; - break; - } - if ( *line == 'n' || *line == 'N' ) { - result = 0; - break; - } - if ( line ) - free(line); - fprintf(stdout, "Invalid string. Must start with yY or nN\n"); - fflush(stdout); - } while ( 1 ); - - if ( line ) - free(line); - if ( theprompt ) - free(theprompt); - return result; -} - -/* parse int out of a string or prompt for it */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base) -{ - long result; - int rc; - - rc = Parser_arg2int(inp, &result, base); - - if ( rc == 0 ) { - return result; - } else { - return Parser_getint(prompt, deft, min, max, base); - } -} - -/* parse int out of a string or prompt for it */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len) -{ - if ( inp == NULL || *inp == '\0' ) { - return Parser_getstr(prompt, deft, answer, len); - } else - return inp; -} - -/* change a string into a number: return 0 on success. No invalid characters - allowed. The processing of base and validity follows strtol(3)*/ -int Parser_arg2int(const char *inp, long *result, int base) -{ - char *endptr; - - if ( (base !=0) && (base < 2 || base > 36) ) - return 1; - - *result = strtol(inp, &endptr, base); - - if ( *inp != '\0' && *endptr == '\0' ) - return 0; - else - return 1; -} - -int Parser_quit(int argc, char **argv) -{ - argc = argc; - argv = argv; - done = 1; - return 0; -} diff --git a/lnet/utils/parser.h b/lnet/utils/parser.h deleted file mode 100644 index 9e7e95a3b4073503ddeb443b6e7d231cb20ad189..0000000000000000000000000000000000000000 --- a/lnet/utils/parser.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#define HISTORY 100 /* Don't let history grow unbounded */ -#define MAXARGS 512 - -#define CMD_COMPLETE 0 -#define CMD_INCOMPLETE 1 -#define CMD_NONE 2 -#define CMD_AMBIG 3 -#define CMD_HELP 4 - -typedef struct parser_cmd { - char *pc_name; - int (* pc_func)(int, char **); - struct parser_cmd * pc_sub_cmd; - char *pc_help; -} command_t; - -typedef struct argcmd { - char *ac_name; - int (*ac_func)(int, char **); - char *ac_help; -} argcmd_t; - -typedef struct network { - char *type; - char *server; - int port; -} network_t; - -int Parser_quit(int argc, char **argv); -void Parser_init(char *, command_t *); /* Set prompt and load command list */ -int Parser_commands(void); /* Start the command parser */ -void Parser_qhelp(int, char **); /* Quick help routine */ -int Parser_help(int, char **); /* Detailed help routine */ -void Parser_printhelp(char *); /* Detailed help routine */ -void Parser_exit(int, char **); /* Shuts down command parser */ -int Parser_execarg(int argc, char **argv, command_t cmds[]); -int execute_line(char * line); - -/* Converts a string to an integer */ -int Parser_int(char *, int *); - -/* Prompts for a string, with default values and a maximum length */ -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len); - -/* Prompts for an integer, with minimum, maximum and default values and base */ -int Parser_getint(const char *prompt, long min, long max, long deft, - int base); - -/* Prompts for a yes/no, with default */ -int Parser_getbool(const char *prompt, int deft); - -/* Extracts an integer from a string, or prompts if it cannot get one */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base); - -/* Extracts a word from the input, or propmts if it cannot get one */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len); - -/* Extracts an integer from a string with a base */ -int Parser_arg2int(const char *inp, long *result, int base); - -#endif diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c deleted file mode 100644 index cefb095b24ab460e39b1c662c96eb926d5d1fa61..0000000000000000000000000000000000000000 --- a/lnet/utils/portals.c +++ /dev/null @@ -1,1762 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <sys/types.h> -#ifdef HAVE_NETDB_H -#include <netdb.h> -#endif -#include <sys/socket.h> -#ifdef HAVE_NETINET_TCP_H -#include <netinet/tcp.h> -#endif -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#ifdef HAVE_SYS_IOCTL_H -#include <sys/ioctl.h> -#endif -#ifndef _IOWR -#include "ioctl.h" -#endif -#include <errno.h> -#include <unistd.h> -#include <time.h> -#include <stdarg.h> -#ifdef HAVE_ENDIAN_H -#include <endian.h> -#endif - -#include <libcfs/portals_utils.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> -#include <lnet/socklnd.h> -#include "parser.h" - -unsigned int libcfs_debug; -unsigned int libcfs_printk; - -static int g_net_set; -static __u32 g_net; - -/* Convert a string boolean to an int; "enable" -> 1 */ -int -lnet_parse_bool (int *b, char *str) -{ - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "down") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "up") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -int -lnet_parse_port (int *port, char *str) -{ - char *end; - - *port = strtol (str, &end, 0); - - if (*end == 0 && /* parsed whole string */ - *port > 0 && *port < 65536) /* minimal sanity check */ - return (0); - - return (-1); -} - -#ifdef HAVE_GETHOSTBYNAME -static struct hostent * -ptl_gethostbyname(char * hname) { - struct hostent *he; - he = gethostbyname(hname); - if (!he) { - switch(h_errno) { - case HOST_NOT_FOUND: - case NO_ADDRESS: - fprintf(stderr, "Unable to resolve hostname: %s\n", - hname); - break; - default: - fprintf(stderr, "gethostbyname error for %s: %s\n", - hname, strerror(h_errno)); - break; - } - return NULL; - } - return he; -} -#endif - -int -lnet_parse_ipquad (__u32 *ipaddrp, char *str) -{ - int a; - int b; - int c; - int d; - - if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) - { - *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d; - return (0); - } - - return (-1); -} - -int -lnet_parse_ipaddr (__u32 *ipaddrp, char *str) -{ -#ifdef HAVE_GETHOSTBYNAME - struct hostent *he; -#endif - - if (!strcmp (str, "_all_")) { - *ipaddrp = 0; - return (0); - } - - if (lnet_parse_ipquad(ipaddrp, str) == 0) - return (0); - -#ifdef HAVE_GETHOSTBYNAME - if ((('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) && - (he = ptl_gethostbyname (str)) != NULL) { - __u32 addr = *(__u32 *)he->h_addr; - - *ipaddrp = ntohl(addr); /* HOST byte order */ - return (0); - } -#endif - - return (-1); -} - -char * -ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup) -{ -#ifdef HAVE_GETHOSTBYNAME - __u32 net_ip; - struct hostent *he; - - if (lookup) { - net_ip = htonl (ipaddr); - he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET); - if (he != NULL) { - strcpy(str, he->h_name); - return (str); - } - } -#endif - - sprintf (str, "%d.%d.%d.%d", - (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff, - (ipaddr >> 8) & 0xff, ipaddr & 0xff); - return (str); -} - -int -lnet_parse_time (time_t *t, char *str) -{ - char *end; - int n; - struct tm tm; - - *t = strtol (str, &end, 0); - if (*end == 0) /* parsed whole string */ - return (0); - - memset (&tm, 0, sizeof (tm)); - n = sscanf (str, "%d-%d-%d-%d:%d:%d", - &tm.tm_year, &tm.tm_mon, &tm.tm_mday, - &tm.tm_hour, &tm.tm_min, &tm.tm_sec); - if (n != 6) - return (-1); - - tm.tm_mon--; /* convert to 0 == Jan */ - tm.tm_year -= 1900; /* y2k quirk */ - tm.tm_isdst = -1; /* dunno if it's daylight savings... */ - - *t = mktime (&tm); - if (*t == (time_t)-1) - return (-1); - - return (0); -} - -int g_net_is_set (char *cmd) -{ - if (g_net_set) - return 1; - - if (cmd != NULL) - fprintf(stderr, - "You must run the 'network' command before '%s'.\n", - cmd); - return 0; -} - -int g_net_is_compatible (char *cmd, ...) -{ - va_list ap; - int nal; - - if (!g_net_is_set(cmd)) - return 0; - - va_start(ap, cmd); - - do { - nal = va_arg (ap, int); - if (nal == LNET_NETTYP(g_net)) { - va_end (ap); - return 1; - } - } while (nal != 0); - - va_end (ap); - - if (cmd != NULL) - fprintf (stderr, - "Command %s not compatible with %s NAL\n", - cmd, - libcfs_lnd2str(LNET_NETTYP(g_net))); - return 0; -} - -int ptl_initialize(int argc, char **argv) -{ - register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, - LNET_DEV_MAJOR, LNET_DEV_MINOR); - return 0; -} - - -int jt_ptl_network(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - __u32 net = LNET_NIDNET(LNET_NID_ANY); - int rc; - - if (argc < 2) { - fprintf(stderr, "usage: %s <net>|up|down\n", argv[0]); - return 0; - } - - if (!strcmp(argv[1], "unconfigure") || - !strcmp(argv[1], "down")) { - LIBCFS_IOC_INIT(data); - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_UNCONFIGURE, &data); - - if (rc == 0) { - printf ("LNET ready to unload\n"); - return 0; - } - - if (errno == EBUSY) - fprintf(stderr, "LNET busy\n"); - else - fprintf(stderr, "LNET unconfigure error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - if (!strcmp(argv[1], "configure") || - !strcmp(argv[1], "up")) { - LIBCFS_IOC_INIT(data); - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CONFIGURE, &data); - - if (rc == 0) { - printf ("LNET configured\n"); - return 0; - } - - fprintf(stderr, "LNET configure error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - net = libcfs_str2net(argv[1]); - if (net == LNET_NIDNET(LNET_NID_ANY)) { - fprintf(stderr, "Can't parse net %s\n", argv[1]); - return -1; - } - - g_net_set = 1; - g_net = net; - return 0; -} - -int -jt_ptl_list_nids(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int all = 0, return_nid = 0; - int count; - int rc; - - all = (argc == 2) && (strcmp(argv[1], "all") == 0); - /* Hack to pass back value */ - return_nid = (argc == 2) && (argv[1][0] == 1); - - if ((argc > 2) && !(all || return_nid)) { - fprintf(stderr, "usage: %s [all]\n", argv[0]); - return 0; - } - - for (count = 0;; count++) { - LIBCFS_IOC_INIT (data); - data.ioc_count = count; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_NI, &data); - - if (rc < 0) { - if ((count > 0) && (errno == ENOENT)) - /* We found them all */ - break; - fprintf(stderr,"IOC_LIBCFS_GET_NI error %d: %s\n", - errno, strerror(errno)); - return -1; - } - - if (all || (LNET_NETTYP(LNET_NIDNET(data.ioc_nid)) != LOLND)) { - printf("%s\n", libcfs_nid2str(data.ioc_nid)); - if (return_nid) { - *(__u64 *)(argv[1]) = data.ioc_nid; - return_nid--; - } - } - } - - return 0; -} - -int -jt_ptl_which_nid (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int best_dist = 0; - int best_order = 0; - lnet_nid_t best_nid = LNET_NID_ANY; - int dist; - int order; - lnet_nid_t nid; - char *nidstr; - int rc; - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s NID [NID...]\n", argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) { - nidstr = argv[i]; - nid = libcfs_str2nid(nidstr); - if (nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse NID %s\n", nidstr); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LNET_DIST, &data); - if (rc != 0) { - fprintf(stderr, "Can't get distance to %s: %s\n", - nidstr, strerror(errno)); - return -1; - } - - dist = data.ioc_u32[0]; - order = data.ioc_u32[1]; - - if (dist < 0) { - if (dist == -EHOSTUNREACH) - continue; - - fprintf(stderr, "Unexpected distance to %s: %d\n", - nidstr, dist); - return -1; - } - - if (best_nid == LNET_NID_ANY || - dist < best_dist || - (dist == best_dist && order < best_order)) { - best_dist = dist; - best_order = order; - best_nid = nid; - } - } - - if (best_nid == LNET_NID_ANY) { - fprintf(stderr, "No reachable NID\n"); - return -1; - } - - printf("%s\n", libcfs_nid2str(best_nid)); - return 0; -} - -int -jt_ptl_print_interfaces (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - char buffer[3][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_INTERFACE, &data); - if (rc != 0) - break; - - printf ("%s: (%s/%s) npeer %d nroute %d\n", - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[2], 1), - ptl_ipaddr_2_str(data.ioc_u32[0], buffer[0], 0), - ptl_ipaddr_2_str(data.ioc_u32[1], buffer[1], 0), - data.ioc_u32[2], data.ioc_u32[3]); - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no interfaces>\n"); - } else { - fprintf(stderr, "Error getting interfaces: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - - return 0; -} - -int -jt_ptl_add_interface (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - __u32 ipaddr; - int rc; - __u32 netmask = 0xffffff00; - int i; - int count; - char *end; - - if (argc < 2 || argc > 3) { - fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible(argv[0], SOCKLND, 0)) - return -1; - - if (lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); - return -1; - } - - if (argc > 2 ) { - count = strtol(argv[2], &end, 0); - if (count > 0 && count < 32 && *end == 0) { - netmask = 0; - for (i = count; i > 0; i--) - netmask = netmask|(1<<(32-i)); - } else if (lnet_parse_ipquad(&netmask, argv[2]) != 0) { - fprintf (stderr, "Can't parse netmask: %s\n", argv[2]); - return -1; - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_u32[0] = ipaddr; - data.ioc_u32[1] = netmask; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_INTERFACE, &data); - if (rc != 0) { - fprintf (stderr, "failed to add interface: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_del_interface (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - __u32 ipaddr = 0; - - if (argc > 2) { - fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible(argv[0], SOCKLND, 0)) - return -1; - - if (argc == 2 && - lnet_parse_ipaddr(&ipaddr, argv[1]) != 0) { - fprintf (stderr, "Can't parse ip: %s\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_u32[0] = ipaddr; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_INTERFACE, &data); - if (rc != 0) { - fprintf (stderr, "failed to delete interface: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_peers (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_process_id_t id; - char buffer[2][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_PEER, &data); - if (rc != 0) - break; - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s [%d]%s->%s:%d #%d\n", - libcfs_id2str(id), - data.ioc_count, /* persistence */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* my ip */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ - data.ioc_u32[1], /* peer port */ - data.ioc_u32[3]); /* conn_count */ - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s s %d%s [%d] "LPD64".%06d" - " m "LPD64"/"LPD64" q %d/%d c %d/%d\n", - libcfs_id2str(id), - data.ioc_net, /* state */ - data.ioc_flags ? "" : " ~!h", /* sent_hello */ - data.ioc_count, /* refcount */ - data.ioc_u64[0]/1000000, /* incarnation secs */ - (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */ - (((__u64)data.ioc_u32[1])<<32) | - ((__u64)data.ioc_u32[0]), /* next_matchbits */ - (((__u64)data.ioc_u32[3])<<32) | - ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */ - data.ioc_u32[5] >> 16, /* nsendq */ - data.ioc_u32[5] & 0xffff, /* nactiveq */ - data.ioc_u32[6] >> 16, /* credits */ - data.ioc_u32[6] & 0xffff); /* outstanding_credits */ - } else if (g_net_is_compatible(NULL, RALND, OPENIBLND, CIBLND, VIBLND, 0)) { - printf ("%-20s [%d]@%s:%d\n", - libcfs_nid2str(data.ioc_nid), /* peer nid */ - data.ioc_count, /* peer persistence */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* peer ip */ - data.ioc_u32[1]); /* peer port */ - } else { - printf ("%-20s [%d]\n", - libcfs_nid2str(data.ioc_nid), data.ioc_count); - } - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no peers>\n"); - } else { - fprintf(stderr, "Error getting peer list: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int -jt_ptl_add_peer (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - __u32 ip = 0; - int port = 0; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, - OPENIBLND, CIBLND, IIBLND, VIBLND, 0)) - return -1; - - if (g_net_is_compatible(NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0)) { - if (argc != 4) { - fprintf (stderr, "usage(tcp,openib,cib,ra): %s nid ipaddr port\n", - argv[0]); - return 0; - } - } else if (g_net_is_compatible(NULL, VIBLND, 0)) { - if (argc != 3) { - fprintf (stderr, "usage(vib): %s nid ipaddr\n", - argv[0]); - return 0; - } - } else if (argc != 2) { - fprintf (stderr, "usage(iib): %s nid\n", argv[0]); - return 0; - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse NID: %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, VIBLND, RALND, 0) && - lnet_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, OPENIBLND, CIBLND, RALND, 0) && - lnet_parse_port (&port, argv[3]) != 0) { - fprintf (stderr, "Can't parse port: %s\n", argv[3]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ip; - data.ioc_u32[1] = port; - - rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_ADD_PEER, &data); - if (rc != 0) { - fprintf (stderr, "failed to add peer: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_del_peer (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid = LNET_NID_ANY; - lnet_pid_t pid = LNET_PID_ANY; - __u32 ip = 0; - char *end; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [ipaddr]\n", - argv[0]); - return 0; - } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [pid]\n", - argv[0]); - return 0; - } - } else if (argc > 2) { - fprintf (stderr, "usage: %s [nid]\n", argv[0]); - return 0; - } - - if (argc > 1 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse nid: %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible(NULL, SOCKLND, 0)) { - if (argc > 2 && - lnet_parse_ipaddr (&ip, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr: %s\n", - argv[2]); - return -1; - } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 2) { - pid = strtol(argv[2], &end, 0); - if (end == argv[2] || *end == 0) { - fprintf(stderr, "Can't parse pid %s\n", - argv[2]); - return -1; - } - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ip; - data.ioc_u32[1] = pid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_PEER, &data); - if (rc != 0) { - fprintf (stderr, "failed to remove peer: %s\n", - strerror (errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_connections (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_process_id_t id; - char buffer[2][64]; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return -1; - - for (index = 0; ; index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_CONN, &data); - if (rc != 0) - break; - - if (g_net_is_compatible (NULL, SOCKLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[6]; - printf ("%-20s %s[%d]%s->%s:%d %d/%d %s\n", - libcfs_id2str(id), - (data.ioc_u32[3] == SOCKLND_CONN_ANY) ? "A" : - (data.ioc_u32[3] == SOCKLND_CONN_CONTROL) ? "C" : - (data.ioc_u32[3] == SOCKLND_CONN_BULK_IN) ? "I" : - (data.ioc_u32[3] == SOCKLND_CONN_BULK_OUT) ? "O" : "?", - data.ioc_u32[4], /* scheduler */ - ptl_ipaddr_2_str (data.ioc_u32[2], buffer[0], 1), /* local IP addr */ - ptl_ipaddr_2_str (data.ioc_u32[0], buffer[1], 1), /* remote IP addr */ - data.ioc_u32[1], /* remote port */ - data.ioc_count, /* tx buffer size */ - data.ioc_u32[5], /* rx buffer size */ - data.ioc_flags ? "nagle" : "nonagle"); - } else if (g_net_is_compatible (NULL, RALND, 0)) { - printf ("%-20s [%d]\n", - libcfs_nid2str(data.ioc_nid), - data.ioc_u32[0] /* device id */); - } else { - printf ("%s\n", libcfs_nid2str(data.ioc_nid)); - } - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no connections>\n"); - } else { - fprintf(stderr, "Error getting connection list: %s: " - "check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int jt_ptl_disconnect(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid = LNET_NID_ANY; - __u32 ipaddr = 0; - int rc; - - if (argc > 3) { - fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible (NULL, SOCKLND, RALND, MXLND, - OPENIBLND, CIBLND, IIBLND, VIBLND, O2IBLND, 0)) - return 0; - - if (argc >= 2 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse nid %s\n", argv[1]); - return -1; - } - - if (g_net_is_compatible (NULL, SOCKLND, 0) && - argc >= 3 && - lnet_parse_ipaddr (&ipaddr, argv[2]) != 0) { - fprintf (stderr, "Can't parse ip addr %s\n", argv[2]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - data.ioc_u32[0] = ipaddr; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_CLOSE_CONNECTION, &data); - if (rc != 0) { - fprintf(stderr, "failed to remove connection: %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -int jt_ptl_push_connection (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - lnet_nid_t nid = LNET_NID_ANY; - - if (argc > 2) { - fprintf(stderr, "usage: %s [nid]\n", argv[0]); - return 0; - } - - if (!g_net_is_compatible (argv[0], SOCKLND, 0)) - return -1; - - if (argc > 1 && - !libcfs_str2anynid(&nid, argv[1])) { - fprintf(stderr, "Can't parse nid: %s\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PUSH_CONNECTION, &data); - if (rc != 0) { - fprintf(stderr, "failed to push connection: %s\n", - strerror(errno)); - return -1; - } - - return 0; -} - -int -jt_ptl_print_active_txs (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int index; - int rc; - - if (!g_net_is_compatible (argv[0], QSWLND, 0)) - return -1; - - for (index = 0;;index++) { - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_TXDESC, &data); - if (rc != 0) - break; - - printf ("type %u payload %6d to %s via %s by pid %6d: " - "%s, %s, state %d\n", - data.ioc_u32[0], - data.ioc_count, - libcfs_nid2str(data.ioc_nid), - libcfs_nid2str(data.ioc_u64[0]), - data.ioc_u32[1], - (data.ioc_flags & 1) ? "delayed" : "immediate", - (data.ioc_flags & 2) ? "nblk" : "normal", - data.ioc_flags >> 2); - } - - if (index == 0) { - if (errno == ENOENT) { - printf ("<no active descs>\n"); - } else { - fprintf(stderr, "Error getting active transmits list: " - "%s: check dmesg.\n", - strerror(errno)); - } - } - return 0; -} - -int jt_ptl_ping_test(int argc, char **argv) -{ - int rc; - lnet_nid_t nid; - long count = 1; - long size = 4; - long timeout = 1; - struct libcfs_ioctl_data data; - - if (argc < 2) { - fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]); - return 0; - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc > 2) - { - count = atol(argv[2]); - - if (count < 0 || count > 20000) - { - fprintf(stderr, "are you insane? %ld is a crazy count.\n", count); - return -1; - } - } - - if (argc > 3) - size= atol(argv[3]); - - if (argc > 4) - timeout = atol (argv[4]); - - LIBCFS_IOC_INIT (data); - data.ioc_count = count; - data.ioc_nid = nid; - data.ioc_u32[0] = size; - data.ioc_u32[1] = timeout; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PING_TEST, &data); - if (rc) { - fprintf(stderr, "failed to start pinger: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_ptl_ping(int argc, char **argv) -{ - int rc; - int timeout; - lnet_process_id_t id; - lnet_process_id_t ids[16]; - int maxids = sizeof(ids)/sizeof(ids[0]); - struct libcfs_ioctl_data data; - char *sep; - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s id [timeout (secs)]\n", argv[0]); - return 0; - } - - sep = strchr(argv[1], '-'); - if (sep == NULL) { - id.pid = LNET_PID_ANY; - id.nid = libcfs_str2nid(argv[1]); - if (id.nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return -1; - } - } else { - char *end; - - if (argv[1][0] == 'u' || - argv[1][0] == 'U') - id.pid = strtoul(&argv[1][1], &end, 0) | LNET_PID_USERFLAG; - else - id.pid = strtoul(argv[1], &end, 0); - - id.nid = libcfs_str2nid(sep + 1); - - if (end != sep || - id.nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse process id \"%s\"\n", argv[1]); - return -1; - } - } - - if (argc > 2) - timeout = 1000 * atol(argv[2]); - else - timeout = 1000; /* default 1 second timeout */ - - LIBCFS_IOC_INIT (data); - data.ioc_nid = id.nid; - data.ioc_u32[0] = id.pid; - data.ioc_u32[1] = timeout; - data.ioc_plen1 = sizeof(ids); - data.ioc_pbuf1 = (char *)ids; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_PING, &data); - if (rc != 0) { - fprintf(stderr, "failed to ping %s: %s\n", - id.pid == LNET_PID_ANY ? - libcfs_nid2str(id.nid) : libcfs_id2str(id), - strerror(errno)); - return -1; - } - - for (i = 0; i < data.ioc_count && i < maxids; i++) - printf("%s\n", libcfs_id2str(ids[i])); - - if (data.ioc_count > maxids) - printf("%d out of %d ids listed\n", maxids, data.ioc_count); - - return 0; -} - -int jt_ptl_mynid(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - int rc; - - if (argc != 2) { - fprintf(stderr, "usage: %s NID\n", argv[0]); - return 0; - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf(stderr, "Can't parse NID '%s'\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = LNET_NIDNET(nid); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_REGISTER_MYNID, &data); - if (rc < 0) - fprintf(stderr, "setting my NID failed: %s\n", - strerror(errno)); - else - printf("registered my nid %s\n", libcfs_nid2str(nid)); - - return 0; -} - -int -jt_ptl_fail_nid (int argc, char **argv) -{ - int rc; - lnet_nid_t nid; - unsigned int threshold; - struct libcfs_ioctl_data data; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s nid|\"*\" [count (0 == mend)]\n", argv[0]); - return (0); - } - - if (!libcfs_str2anynid(&nid, argv[1])) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc < 3) { - threshold = LNET_MD_THRESH_INF; - } else if (sscanf (argv[2], "%i", &threshold) != 1) { - fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); - return (-1); - } - - LIBCFS_IOC_INIT (data); - data.ioc_nid = nid; - data.ioc_count = threshold; - - rc = l_ioctl (LNET_DEV_ID, IOC_LIBCFS_FAIL_NID, &data); - if (rc < 0) - fprintf (stderr, "IOC_LIBCFS_FAIL_NID failed: %s\n", - strerror (errno)); - else - printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]); - - return (0); -} - -int -jt_ptl_add_route (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t gateway_nid; - unsigned int hops = 1; - char *end; - int rc; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s gateway [hopcount]\n", argv[0]); - return (0); - } - - if (!g_net_is_set(argv[0])) - return (-1); - - gateway_nid = libcfs_str2nid(argv[1]); - if (gateway_nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); - return (-1); - } - - if (argc == 3) { - hops = strtoul(argv[2], &end, 0); - if (hops >= 256 || *end != 0) { - fprintf (stderr, "Can't parse hopcount \"%s\"\n", argv[2]); - return -1; - } - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net; - data.ioc_count = hops; - data.ioc_nid = gateway_nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_ADD_ROUTE failed: %s\n", strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_del_route (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - lnet_nid_t nid; - int rc; - - if (argc != 2) { - fprintf (stderr, "usage: %s gatewayNID\n", argv[0]); - return (0); - } - - if (!libcfs_str2anynid(&nid, argv[1])) { - fprintf (stderr, "Can't parse gateway NID " - "\"%s\"\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_net = g_net_set ? g_net : LNET_NIDNET(LNET_NID_ANY); - data.ioc_nid = nid; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_ROUTE, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_DEL_ROUTE (%s) failed: %s\n", - libcfs_nid2str(nid), strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_notify_router (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int enable; - lnet_nid_t nid; - int rc; - struct timeval now; - time_t when; - - if (argc < 3) - { - fprintf (stderr, "usage: %s targetNID <up/down> [<time>]\n", - argv[0]); - return (0); - } - - nid = libcfs_str2nid(argv[1]); - if (nid == LNET_NID_ANY) { - fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]); - return (-1); - } - - if (lnet_parse_bool (&enable, argv[2]) != 0) { - fprintf (stderr, "Can't parse boolean %s\n", argv[2]); - return (-1); - } - - gettimeofday(&now, NULL); - - if (argc < 4) { - when = now.tv_sec; - } else if (lnet_parse_time (&when, argv[3]) != 0) { - fprintf(stderr, "Can't parse time %s\n" - "Please specify either 'YYYY-MM-DD-HH:MM:SS'\n" - "or an absolute unix time in seconds\n", argv[3]); - return (-1); - } else if (when > now.tv_sec) { - fprintf (stderr, "%s specifies a time in the future\n", - argv[3]); - return (-1); - } - - LIBCFS_IOC_INIT(data); - data.ioc_nid = nid; - data.ioc_flags = enable; - /* Yeuch; 'cept I need a __u64 on 64 bit machines... */ - data.ioc_u64[0] = (__u64)when; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_NOTIFY_ROUTER, &data); - if (rc != 0) { - fprintf (stderr, "IOC_LIBCFS_NOTIFY_ROUTER (%s) failed: %s\n", - libcfs_nid2str(nid), strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_print_routes (int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - int index; - __u32 net; - lnet_nid_t nid; - unsigned int hops; - int alive; - - for (index = 0;;index++) - { - LIBCFS_IOC_INIT(data); - data.ioc_count = index; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_ROUTE, &data); - if (rc != 0) - break; - - net = data.ioc_net; - hops = data.ioc_count; - nid = data.ioc_nid; - alive = data.ioc_flags; - - printf ("net %18s hops %u gw %32s %s\n", - libcfs_net2str(net), hops, - libcfs_nid2str(nid), alive ? "up" : "down"); - } - - if (errno != ENOENT) - fprintf(stderr, "Error getting routes: %s: check dmesg.\n", - strerror(errno)); - - return (0); -} - -static int -lwt_control(int enable, int clear) -{ - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT(data); - data.ioc_flags = (enable ? 1 : 0) | (clear ? 2 : 0); - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_CONTROL, &data); - if (rc == 0) - return (0); - - fprintf(stderr, "IOC_LIBCFS_LWT_CONTROL failed: %s\n", - strerror(errno)); - return (-1); -} - -static int -lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, - lwt_event_t *events, int size) -{ - struct libcfs_ioctl_data data; - int rc; - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = (char *)events; - data.ioc_plen1 = size; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_SNAPSHOT, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_SNAPSHOT failed: %s\n", - strerror(errno)); - return (-1); - } - - /* crappy overloads */ - if (data.ioc_u32[2] != sizeof(lwt_event_t) || - data.ioc_u32[3] != offsetof(lwt_event_t, lwte_where)) { - fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n", - (int)data.ioc_u32[2], (int)sizeof(lwt_event_t), - (int)data.ioc_u32[3], - (int)offsetof(lwt_event_t, lwte_where)); - return (-1); - } - - if (now != NULL) - *now = data.ioc_u64[0]; - - LASSERT (data.ioc_u32[0] != 0); - if (ncpu != NULL) - *ncpu = data.ioc_u32[0]; - - LASSERT (data.ioc_u32[1] != 0); - if (totalsize != NULL) - *totalsize = data.ioc_u32[1]; - - return (0); -} - -static char * -lwt_get_string(char *kstr) -{ - char *ustr; - struct libcfs_ioctl_data data; - int size; - int rc; - - /* FIXME: this could maintain a symbol table since we expect to be - * looking up the same strings all the time... */ - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = NULL; - data.ioc_plen2 = 0; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } - - size = data.ioc_count; - ustr = (char *)malloc(size); - if (ustr == NULL) { - fprintf(stderr, "Can't allocate string storage of size %d\n", - size); - return (NULL); - } - - LIBCFS_IOC_INIT(data); - data.ioc_pbuf1 = kstr; - data.ioc_plen1 = 1; /* non-zero just to fool portal_ioctl_is_invalid() */ - data.ioc_pbuf2 = ustr; - data.ioc_plen2 = size; - - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_LWT_LOOKUP_STRING, &data); - if (rc != 0) { - fprintf(stderr, "IOC_LIBCFS_LWT_LOOKUP_STRING failed: %s\n", - strerror(errno)); - return (NULL); - } - - LASSERT(strlen(ustr) == size - 1); - return (ustr); -} - -static void -lwt_put_string(char *ustr) -{ - free(ustr); -} - -static int -lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e) -{ -#ifndef __WORDSIZE -# error "__WORDSIZE not defined" -#elif __WORDSIZE == 32 -# define XFMT "%#010lx" -#elif __WORDSIZE== 64 -# define XFMT "%#018lx" -#else -# error "Unexpected __WORDSIZE" -#endif - char *where = lwt_get_string(e->lwte_where); - - if (where == NULL) - return (-1); - - fprintf(f, XFMT" "XFMT" "XFMT" "XFMT": "XFMT" %2d %10.6f %10.2f %s\n", - e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4, - (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0), - (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz, - where); - - lwt_put_string(where); - - return (0); -#undef XFMT -} - -double -get_cycles_per_usec () -{ - FILE *f = fopen ("/proc/cpuinfo", "r"); - double mhz; - char line[64]; - - if (f != NULL) { - while (fgets (line, sizeof (line), f) != NULL) - if (sscanf (line, "cpu MHz : %lf", &mhz) == 1) { - fclose (f); - return (mhz); - } - fclose (f); - } - - fprintf (stderr, "Can't read/parse /proc/cpuinfo\n"); - return (1000.0); -} - -int -jt_ptl_lwt(int argc, char **argv) -{ - const int lwt_max_cpus = 32; - int ncpus; - int totalspace; - int nevents_per_cpu; - lwt_event_t *events; - lwt_event_t *cpu_event[lwt_max_cpus + 1]; - lwt_event_t *next_event[lwt_max_cpus]; - lwt_event_t *first_event[lwt_max_cpus]; - int cpu; - lwt_event_t *e; - int rc; - int i; - double mhz; - cycles_t t0; - cycles_t tlast; - cycles_t tnow; - struct timeval tvnow; - int printed_date = 0; - int nlines = 0; - FILE *f = stdout; - - if (argc < 2 || - (strcmp(argv[1], "start") && - strcmp(argv[1], "stop"))) { - fprintf(stderr, - "usage: %s start\n" - " %s stop [fname]\n", argv[0], argv[0]); - return (-1); - } - - if (!strcmp(argv[1], "start")) { - /* disable */ - if (lwt_control(0, 0) != 0) - return (-1); - - /* clear */ - if (lwt_control(0, 1) != 0) - return (-1); - - /* enable */ - if (lwt_control(1, 0) != 0) - return (-1); - - return (0); - } - - if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0) - return (-1); - - if (ncpus > lwt_max_cpus) { - fprintf(stderr, "Too many cpus: %d (%d)\n", - ncpus, lwt_max_cpus); - return (-1); - } - - events = (lwt_event_t *)malloc(totalspace); - if (events == NULL) { - fprintf(stderr, "Can't allocate %d\n", totalspace); - return (-1); - } - - if (lwt_control(0, 0) != 0) { /* disable */ - free(events); - return (-1); - } - - if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) { - free(events); - return (-1); - } - - /* we want this time to be sampled at snapshot time */ - gettimeofday(&tvnow, NULL); - - if (argc > 2) { - f = fopen (argv[2], "w"); - if (f == NULL) { - fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno)); - free(events); - return (-1); - } - } - - mhz = get_cycles_per_usec(); - - /* carve events into per-cpu slices */ - nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t)); - for (cpu = 0; cpu <= ncpus; cpu++) - cpu_event[cpu] = &events[cpu * nevents_per_cpu]; - - /* find the earliest event on each cpu */ - for (cpu = 0; cpu < ncpus; cpu++) { - first_event[cpu] = NULL; - - for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) { - - if (e->lwte_where == NULL) /* not an event */ - continue; - - if (first_event[cpu] == NULL || - first_event[cpu]->lwte_when > e->lwte_when) - first_event[cpu] = e; - } - - next_event[cpu] = first_event[cpu]; - } - - t0 = tlast = 0; - for (cpu = 0; cpu < ncpus; cpu++) { - e = first_event[cpu]; - if (e == NULL) /* no events this cpu */ - continue; - - if (e == cpu_event[cpu]) - e = cpu_event[cpu + 1] - 1; - else - e = e - 1; - - /* If there's an event immediately before the first one, this - * cpu wrapped its event buffer */ - if (e->lwte_where == NULL) - continue; - - /* We should only start outputting events from the most recent - * first event in any wrapped cpu. Events before this time on - * other cpus won't have any events from this CPU to interleave - * with. */ - if (t0 < first_event[cpu]->lwte_when) - t0 = first_event[cpu]->lwte_when; - } - - for (;;) { - /* find which cpu has the next event */ - cpu = -1; - for (i = 0; i < ncpus; i++) { - - if (next_event[i] == NULL) /* this cpu exhausted */ - continue; - - if (cpu < 0 || - next_event[i]->lwte_when < next_event[cpu]->lwte_when) - cpu = i; - } - - if (cpu < 0) /* all cpus exhausted */ - break; - - if (t0 == 0) { - /* no wrapped cpus and this is he first ever event */ - t0 = next_event[cpu]->lwte_when; - } - - if (t0 <= next_event[cpu]->lwte_when) { - /* on or after the first event */ - if (!printed_date) { - cycles_t du = (tnow - t0) / mhz; - time_t then = tvnow.tv_sec - du/1000000; - - if (du % 1000000 > tvnow.tv_usec) - then--; - - fprintf(f, "%s", ctime(&then)); - printed_date = 1; - } - - rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]); - if (rc != 0) - break; - - if (++nlines % 10000 == 0 && f != stdout) { - /* show some activity... */ - printf("."); - fflush (stdout); - } - } - - tlast = next_event[cpu]->lwte_when; - - next_event[cpu]++; - if (next_event[cpu] == cpu_event[cpu + 1]) - next_event[cpu] = cpu_event[cpu]; - - if (next_event[cpu]->lwte_where == NULL || - next_event[cpu] == first_event[cpu]) - next_event[cpu] = NULL; - } - - if (f != stdout) { - printf("\n"); - fclose(f); - } - - free(events); - return (0); -} - -int jt_ptl_memhog(int argc, char **argv) -{ - static int gfp = 0; /* sticky! */ - - struct libcfs_ioctl_data data; - int rc; - int count; - char *end; - - if (argc < 2) { - fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]); - return 0; - } - - count = strtol(argv[1], &end, 0); - if (count < 0 || *end != 0) { - fprintf(stderr, "Can't parse page count '%s'\n", argv[1]); - return -1; - } - - if (argc >= 3) { - rc = strtol(argv[2], &end, 0); - if (*end != 0) { - fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]); - return -1; - } - gfp = rc; - } - - LIBCFS_IOC_INIT(data); - data.ioc_count = count; - data.ioc_flags = gfp; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_MEMHOG, &data); - - if (rc != 0) { - fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno)); - return -1; - } - - printf("memhog %d OK\n", count); - return 0; -} - -int jt_ptl_testprotocompat(int argc, char **argv) -{ - struct libcfs_ioctl_data data; - int rc; - int flags; - char *end; - - if (argc < 2) { - fprintf(stderr, "usage: %s <number>\n", argv[0]); - return 0; - } - - flags = strtol(argv[1], &end, 0); - if (flags < 0 || *end != 0) { - fprintf(stderr, "Can't parse flags '%s'\n", argv[1]); - return -1; - } - - LIBCFS_IOC_INIT(data); - data.ioc_flags = flags; - rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_TESTPROTOCOMPAT, &data); - - if (rc != 0) { - fprintf(stderr, "test proto compat %x failed: %s\n", - flags, strerror(errno)); - return -1; - } - - printf("test proto compat %x OK\n", flags); - return 0; -} - - diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c deleted file mode 100644 index c3ab2b7d04f78f0a98cc2fe00fb7d56e584d7baa..0000000000000000000000000000000000000000 --- a/lnet/utils/ptlctl.c +++ /dev/null @@ -1,77 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <lnet/api-support.h> -#include <lnet/lnetctl.h> - -#include "parser.h" - - -command_t list[] = { - {"network", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, - {"net", jt_ptl_network, 0,"select/configure network (args: up|down|LND name)"}, - {"list_nids", jt_ptl_list_nids, 0,"list local NIDs"}, - {"which_nid", jt_ptl_which_nid, 0,"select the closest NID"}, - {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"}, - {"add_interface", jt_ptl_add_interface, 0, "add interface entry (args: ip [netmask])"}, - {"del_interface", jt_ptl_del_interface, 0, "delete interface entries (args: [ip])"}, - {"print_peers", jt_ptl_print_peers, 0, "print peer entries (no args)"}, - {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"}, - {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"}, - {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"}, - {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"}, - {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"}, - {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"}, - {"testping", jt_ptl_ping_test, 0, "do a ping test (args: nid [count] [size] [timeout])"}, - {"ping", jt_ptl_ping, 0, "ping (args: nid [timeout] [pid])"}, - {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, - {"add_route", jt_ptl_add_route, 0, - "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, - {"del_route", jt_ptl_del_route, 0, - "delete all routes via a gateway from the routing table (args: gatewayNID"}, - {"set_route", jt_ptl_notify_router, 0, - "enable/disable a route in the routing table (args: gatewayNID up/down [time]"}, - {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"}, - {"testprotocompat", jt_ptl_testprotocompat, 0, "usage: testprotocompat count"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (ptl_initialize(argc, argv) < 0) - exit(1); - - Parser_init("ptlctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - return 0; -} diff --git a/lnet/utils/routerstat.c b/lnet/utils/routerstat.c deleted file mode 100644 index 0b4e5135539fabf0fdad7e63bb49d2a7bf73f6b8..0000000000000000000000000000000000000000 --- a/lnet/utils/routerstat.c +++ /dev/null @@ -1,158 +0,0 @@ -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/time.h> - -double -timenow () -{ - struct timeval tv; - - gettimeofday (&tv, NULL); - return (tv.tv_sec + tv.tv_usec / 1000000.0); -} - -typedef struct { - unsigned long msgs_alloc; - unsigned long msgs_max; - unsigned long errors; - unsigned long send_count; - unsigned long recv_count; - unsigned long route_count; - unsigned long drop_count; - unsigned long long send_length; - unsigned long long recv_length; - unsigned long long route_length; - unsigned long long drop_length; -} counters_t; - -unsigned long long subull(unsigned long long a, unsigned long long b) -{ - if (a < b) - return -1ULL - b + a + 1; - - return a - b; -} - -unsigned long long subul(unsigned long a, unsigned long b) -{ - if (a < b) - return -1UL - b + a + 1; - - return a - b; -} - -double rul(unsigned long a, double secs) -{ - return (double)a/secs; -} - -double rull(unsigned long long a, double secs) -{ - return (double)a/secs; -} - -void -do_stat (int fd) -{ - static char buffer[1024]; - static double last = 0.0; - static counters_t old_counter; - double now; - double t; - counters_t new_counter; - counters_t counter; - int n; - - lseek (fd, 0, SEEK_SET); - now = timenow(); - n = read (fd, buffer, sizeof (buffer)); - if (n < 0) - { - fprintf (stderr, "Can't read statfile\n"); - exit (1); - } - buffer[n] = 0; - - n = sscanf (buffer, "%lu %lu %lu %lu %lu %lu %lu %Lu %Lu %Lu %Lu", - &new_counter.msgs_alloc, &new_counter.msgs_max, - &new_counter.errors, - &new_counter.send_count, &new_counter.recv_count, - &new_counter.route_count, &new_counter.drop_count, - &new_counter.send_length, &new_counter.recv_length, - &new_counter.route_length, &new_counter.drop_length); - if (n < 11) - { - fprintf (stderr, "Can't parse statfile\n"); - exit (1); - } - - if (last == 0.0) { - printf ("M %lu(%lu) E %lu S %lu/%llu R %lu/%llu F %lu/%llu D %lu/%llu\n", - new_counter.msgs_alloc, new_counter.msgs_max, - new_counter.errors, - new_counter.send_count, new_counter.send_length, - new_counter.recv_count, new_counter.recv_length, - new_counter.route_count, new_counter.route_length, - new_counter.drop_count, new_counter.drop_length); - } else { - t = now - last; - - counter.msgs_alloc = new_counter.msgs_alloc; - counter.msgs_max = new_counter.msgs_max; - - counter.errors = subul(new_counter.errors, old_counter.errors); - counter.send_count = subul(new_counter.send_count, old_counter.send_count); - counter.recv_count = subul(new_counter.recv_count, old_counter.recv_count); - counter.route_count = subul(new_counter.route_count, old_counter.route_count); - counter.drop_count = subul(new_counter.drop_count, old_counter.drop_count); - counter.send_length = subull(new_counter.send_length, old_counter.send_length); - counter.recv_length = subull(new_counter.recv_length, old_counter.recv_length); - counter.route_length = subull(new_counter.route_length, old_counter.route_length); - counter.drop_length = subull(new_counter.drop_length, old_counter.drop_length); - - printf ("M %3lu(%3lu) E %0.0f S %7.2f/%6.0f R %7.2f/%6.0f F %7.2f/%6.0f D %4.2f/%0.0f\n", - counter.msgs_alloc, counter.msgs_max, - rul(counter.errors,t), - rull(counter.send_length,t*1024.0*1024.0), rul(counter.send_count, t), - rull(counter.recv_length,t*1024.0*1024.0), rul(counter.recv_count, t), - rull(counter.route_length,t*1024.0*1024.0), rul(counter.route_count, t), - rull(counter.drop_length,t*1024.0*1024.0), rul(counter.drop_count, t)); - } - - old_counter = new_counter; - fflush (stdout); - - lseek (fd, 0, SEEK_SET); - last = timenow(); -} - -int main (int argc, char **argv) -{ - int interval = 0; - int fd; - - if (argc > 1) - interval = atoi (argv[1]); - - fd = open ("/proc/sys/lnet/stats", O_RDONLY); - if (fd < 0) - { - fprintf (stderr, "Can't open stat: %s\n", strerror (errno)); - return (1); - } - - do_stat (fd); - if (interval == 0) - return (0); - - for (;;) - { - sleep (interval); - do_stat (fd); - } -} diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c deleted file mode 100644 index 9590b8b9ab196e4caca2b9ebb278c34108c050eb..0000000000000000000000000000000000000000 --- a/lnet/utils/wirecheck.c +++ /dev/null @@ -1,213 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <lnet/lib-lnet.h> - -#include <string.h> - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -check_lnet_handle_wire (void) -{ - CHECK_STRUCT (lnet_handle_wire_t); - CHECK_MEMBER (lnet_handle_wire_t, wh_interface_cookie); - CHECK_MEMBER (lnet_handle_wire_t, wh_object_cookie); -} - -void -check_lnet_magicversion (void) -{ - CHECK_STRUCT (lnet_magicversion_t); - CHECK_MEMBER (lnet_magicversion_t, magic); - CHECK_MEMBER (lnet_magicversion_t, version_major); - CHECK_MEMBER (lnet_magicversion_t, version_minor); -} - -void -check_lnet_hdr (void) -{ - CHECK_STRUCT (lnet_hdr_t); - CHECK_MEMBER (lnet_hdr_t, dest_nid); - CHECK_MEMBER (lnet_hdr_t, src_nid); - CHECK_MEMBER (lnet_hdr_t, dest_pid); - CHECK_MEMBER (lnet_hdr_t, src_pid); - CHECK_MEMBER (lnet_hdr_t, type); - CHECK_MEMBER (lnet_hdr_t, payload_length); - CHECK_MEMBER (lnet_hdr_t, msg); - - BLANK_LINE (); - COMMENT ("Ack"); - CHECK_MEMBER (lnet_hdr_t, msg.ack.dst_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.ack.mlength); - - BLANK_LINE (); - COMMENT ("Put"); - CHECK_MEMBER (lnet_hdr_t, msg.put.ack_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.put.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.put.hdr_data); - CHECK_MEMBER (lnet_hdr_t, msg.put.ptl_index); - CHECK_MEMBER (lnet_hdr_t, msg.put.offset); - - BLANK_LINE (); - COMMENT ("Get"); - CHECK_MEMBER (lnet_hdr_t, msg.get.return_wmd); - CHECK_MEMBER (lnet_hdr_t, msg.get.match_bits); - CHECK_MEMBER (lnet_hdr_t, msg.get.ptl_index); - CHECK_MEMBER (lnet_hdr_t, msg.get.src_offset); - CHECK_MEMBER (lnet_hdr_t, msg.get.sink_length); - - BLANK_LINE (); - COMMENT ("Reply"); - CHECK_MEMBER (lnet_hdr_t, msg.reply.dst_wmd); - - BLANK_LINE (); - COMMENT ("Hello"); - CHECK_MEMBER (lnet_hdr_t, msg.hello.incarnation); - CHECK_MEMBER (lnet_hdr_t, msg.hello.type); -} - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[256]; - char gccinfo[256]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void lnet_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - - CHECK_DEFINE (LNET_PROTO_OPENIB_MAGIC); - CHECK_DEFINE (LNET_PROTO_RA_MAGIC); - - CHECK_DEFINE (LNET_PROTO_TCP_MAGIC); - CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MAJOR); - CHECK_DEFINE (LNET_PROTO_TCP_VERSION_MINOR); - - CHECK_VALUE (LNET_MSG_ACK); - CHECK_VALUE (LNET_MSG_PUT); - CHECK_VALUE (LNET_MSG_GET); - CHECK_VALUE (LNET_MSG_REPLY); - CHECK_VALUE (LNET_MSG_HELLO); - - check_lnet_handle_wire (); - check_lnet_magicversion (); - check_lnet_hdr (); - - printf ("}\n\n"); - - return (0); -} diff --git a/lustre-iokit/AUTHORS b/lustre-iokit/AUTHORS deleted file mode 100644 index f0f587a37178336cd85873229e31b6ad81284cf5..0000000000000000000000000000000000000000 --- a/lustre-iokit/AUTHORS +++ /dev/null @@ -1,5 +0,0 @@ -Phil Schwan -Eric Barton <eeb@clusterfs.com> -Jody McIntyre <scjody@clusterfs.com> -Michael MacDonald <mjmac@clusterfs.com> -Cliff White <cliffw@clusterfs.com> diff --git a/lustre-iokit/ChangeLog b/lustre-iokit/ChangeLog deleted file mode 100644 index 86f65969644644dc1abc8dea806abc3d133e12b9..0000000000000000000000000000000000000000 --- a/lustre-iokit/ChangeLog +++ /dev/null @@ -1 +0,0 @@ -2006-10-31 - Borrowing mjmac's most excellent work and setting up autogen/RPM diff --git a/lustre-iokit/Makefile.am b/lustre-iokit/Makefile.am deleted file mode 100644 index 919458e703ebb69b8e80c68f7dd2f5fcf390823e..0000000000000000000000000000000000000000 --- a/lustre-iokit/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -SUBDIRS = obdfilter-survey sgpdd-survey ost-survey ior-survey - -EXTRA_DIST = lustre-iokit.spec - -rpm: dist - rpmbuild -ta $(PACKAGE)-$(VERSION).tar.gz diff --git a/lustre-iokit/NEWS b/lustre-iokit/NEWS deleted file mode 100644 index 6874d57ead8be478b5c8bfd9ea8819b01357453c..0000000000000000000000000000000000000000 --- a/lustre-iokit/NEWS +++ /dev/null @@ -1 +0,0 @@ -2006-10-31 - Moved to autoconf diff --git a/lustre-iokit/README b/lustre-iokit/README deleted file mode 100644 index afc6a5a91a6912bceffeb52ed26514256fabf13e..0000000000000000000000000000000000000000 --- a/lustre-iokit/README +++ /dev/null @@ -1,24 +0,0 @@ -This bundle includes four tools: -In order of preference: - -sgpdd-survey: - a test of the 'bare metal' performance, bypassing -as much of the kernel as we can. Does not require Lustre, does -require the sgp_dd package. WILL ERASE ALL DATA ON DEVICE. - -obdfilter-survey: -Shell script - tests performance of isolated OSTS, network -via echo clients, end-to-end test. - -obdsurvey: - a test of Lustre performance with three modes: -Maintained by Scali, included here as an extra. -Requires Python > 2.2 -Requires Lustre - -- local disk test - requires one OST -- network performance test - requires two Lustre machines -- network + disk test - requires Lustre filesystem and client - -ior-survey: -A script to run the IOR benchmark. Version 2.8.6 of IOR is included - - diff --git a/lustre-iokit/autogen.sh b/lustre-iokit/autogen.sh deleted file mode 100644 index 37f4a1551dcb5f5655d43489c93b824ff54d7489..0000000000000000000000000000000000000000 --- a/lustre-iokit/autogen.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -fatal() { - local msg="$1" - - echo "FATAL: $msg" - exit 1 -} - -run_cmd() { - local cmd="$1" - - echo "Running $cmd..." - $cmd || fatal "$cmd failed!" -} - -run_cmd aclocal -run_cmd "automake -a -c" -run_cmd autoconf - -echo "Finished. Ready for ./configure ..." diff --git a/lustre-iokit/configure.ac b/lustre-iokit/configure.ac deleted file mode 100644 index 9592dadddd4baaf23ca10ab94f98c526d5a135a7..0000000000000000000000000000000000000000 --- a/lustre-iokit/configure.ac +++ /dev/null @@ -1,15 +0,0 @@ -AC_INIT -AM_INIT_AUTOMAKE(lustre-iokit,1.1) -AC_PATH_PROGS(BASH, bash) -AC_PATH_PROGS(PERL, perl) -RELEASE="`date +%Y%m%d%H%M`" -AC_SUBST(RELEASE) - -AC_OUTPUT( -lustre-iokit.spec -Makefile -sgpdd-survey/Makefile -obdfilter-survey/Makefile -ost-survey/Makefile -ior-survey/Makefile -) diff --git a/lustre-iokit/ior-survey/Makefile.am b/lustre-iokit/ior-survey/Makefile.am deleted file mode 100644 index 537162b2171322acc20e04c8376a069cbc64466d..0000000000000000000000000000000000000000 --- a/lustre-iokit/ior-survey/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -bin_SCRIPTS = ior-survey parse-ior -CLEANFILE = $(bin_SCRIPTS) -EXTRA_DIST = README.ior-survey ior-survey parse-ior diff --git a/lustre-iokit/ior-survey/README.ior-survey b/lustre-iokit/ior-survey/README.ior-survey deleted file mode 100644 index 8d734da7f29f0fbb22f2e177b8b75e26d9526579..0000000000000000000000000000000000000000 --- a/lustre-iokit/ior-survey/README.ior-survey +++ /dev/null @@ -1,190 +0,0 @@ -Introduction : - - The ior_survey script can be used to test the performance of the lustre -file systems. It uses IOR (Interleaved Or Random), a script used for testing -performance of parallel file systems using various interfaces and access -patterns. IOR uses MPI for process synchronization. - -General Description: - - ior_mpiio is a parallel file system test developed by the SIOP (Scalable -I/O Project) at LLNL. This parallel program performs parallel writes and -reads to/from a file using MPI-IO and reporting the throughput rates. - - MPI is used for process synchronization. Under the control of compile-time -defined constants (and, to a lesser extent, environment variables), I/O is done -via MPI-IO. The data are written and read using independent parallel transfers -of equal-sized blocks of contiguous bytes that cover the file with no gaps and -that do not overlap each other. The test consists of creating a new file, -writing it with data, then reading the data back. - - The data written are C integers. If the program runs successfully to -completion, it returns 0. If a problem is detected with any I/O routine, the -program exits with a value of IO_ERR. - - If a non-I/O problem is detected, the program exits with a value of -INTERNAL_ERR (this can be caused by a bug in the test program, or a problem in -MPI, or by inconsistencies in the environment variable settings). - -Requirements : - To run the ior_survey script following items are required. - -1: IOR - - The IOR test should be obtained at - ftp://ftp.llnl.gov/pub/siop/ior/ - -2: pdsh - The tarball can be obtained from - http://sourceforge.net/project/showfiles.php?group_id=33530&package_id=183641 - -3: pdsh-rcmd-ssh module - The rpm for this could be found at - http://sourceforge.net/project/showfiles.php?group_id=33530&package_id=183641 - -4: lam/mpi - The tarball can be obtained from - http://www.lam-mpi.org/7.1/download.php - -5: You need to be a non-root user to execute the script and should have the - super-user privileges. - -6: The user should have login on all the nodes without password on which the - test is going to be run. - - - -To make an entry into the sudoers file : - -1: Become super user (root) - -2: type visudo - -3: make an entry as - username ALL=(ALL) NOPASSWD: ALL //(username is the name of the user) - - -Building IOR : - - Type 'gmake mpiio' from the IOR/ directory. In - IOR/src/C, the file Makefile.config currently has settings for AIX, Linux, - OSF1 (TRU64), and IRIX64 to model on. Note that MPI must be present for - building/running IOR, and that MPI I/O must be available for MPI I/O, HDF5, - and Parallel netCDF builds. As well, HDF5 and Parallel netCDF libraries are - necessary for those builds. All IOR builds include the POSIX interface. - - Copy the IOR binary file in IOR/src/C/ to /usr/local/sbin/ using - - sudo cp IOR/src/C/IOR /usr/local/sbin/ - - - -Installing pdsh and pdsh-rcmd-ssh module : - -1: Download the pdsh tarball - -2: untar it using tar -xzvf (if tar.gz) or tar -xjvf(if tar.bz2) - -3: go to the pdsh directory and type ./bootstrap - -4: configure it using the following command - - ./configure --with-ssh - -5: Build it using "make" - -6: Install it using "sudo make install" - -7: Download the pdsh-rcmd-ssh rpm - -8: Install the rpm using "rpm -ivh pdsh-rcmd-ssh*" - - -Installing lam/mpi : - -1: Download the lam tarball - -2: untar it using tar -xzvf (if tar.gz) or tar -xjvf(if tar.bz2) - -3: go to the lam directory and type ./configure - -4: Build it using "make" - -5: Install it using "sudo make install" - - The lam, IOR, pdsh should be installed on all the nodes on which the - test is going to be run. - -Note: Please make sure that you have installed the same version of lam on all -the nodes on which the test is going to be run. - - - -Running the ior_survey script : - -1: Lustre should be mounted at /mnt/lustre. Do - "touch /mnt/lustre/ior_survey_testfile" - -2: Make a hostfile in which the ip addresses of all the nodes are present on - the node from where the script is going to be executed. - -3: run the lam using "lamboot -v -d hostfile". This will start lamd on all the - nodes. - -4: run the ior_survey script using "./ior_survey" - -Note: - The node names of the clients should be like rhea1, rhea2, rhea3, so on. - The name of the cluster (1st part of the node name) should be set in the - ior_survey script in the cluster name field. - e.g. cluster=rhea //name of the cluster - - The client node numbers should be set as last part of the node name i.e. - numeral part. - e.g. client=(1) //to run test on one node only node1. - client=(1-2) //to run test on two nodes node1, node2. - - Please note that the hostfile should contain the ip addresses of only - those nodes on which the lustre filesystem is mounted i.e. clients are - mounted. - - The details of the test can be found on the node from where the - test was run as /tmp/ior_survey_run_date@start_time_nodename.detail - - The output of the IOR looks like - -host1: access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) iter -host1: ------ --------- ---------- --------- -------- -------- -------- ---- -host1: write 1.58 2097152 1024.00 0.000873 1299.37 0.000132 0 -host1: -host1: Max Write: 1.58 MiB/sec (1.65 MB/sec) - - where, - host1 : node on which the test is run - access: the test which is run (write, rewrite, read, reread) - bw : band width - block : total size to be written - xfer : block size to transfer here 1MB - open : time taken for open - close : time taken for close - wr/rd : time taken for read/write - iteration : iteration no. - Max write : Max_write speed obtained - -Note : MB is defined as 1,000,000 bytes and MiB is 1,048,576 bytes. - - The summary of the test can be found on the node from where the - test was run as /tmp/ior_survey_run_date@start_time_nodename.summary - It contains the tests run and the status of those tests. - - -Instructions for graphing IOR results - - The plot-ior.pl script will plot the results from the .detail file - generated by ior-survery. It will create a data file for writes as - /tmp/ior_survey_run_date@start_time_nodename.detail.dat1 and for reads - as /tmp/ior_survey_run_date@start_time_nodename.detail.dat2 and gnuplot - file as /tmp/ior_survey_run_date@start_time_nodename.detail.scr. - - $ perl parse-ior.pl /tmp/ior_survey_run_date@start_time_nodename.detail - diff --git a/lustre-iokit/ior-survey/ior-survey b/lustre-iokit/ior-survey/ior-survey deleted file mode 100644 index 235bdc87fb3ef75dd5dbf06a5cc4c7d89a2af5fc..0000000000000000000000000000000000000000 --- a/lustre-iokit/ior-survey/ior-survey +++ /dev/null @@ -1,290 +0,0 @@ -#!/bin/bash - -# cluster name (all node names are this followed by the node number) -cluster=mdev - -# client node numbers (individual numbers or inclusive ranges) -clients=(7-8) - -# numbers of clients to survey -clients_lo=1 -clients_hi=2 -clients_iterator="+=1" - -# numbers of tasks per client to survey -tasks_per_client_lo=1 -tasks_per_client_hi=8 -tasks_per_client_iterator="*=2" - -# record sizes to survey -rsize_lo=1M -rsize_hi=1M -rsize_iterator="*=2" - -## which tests to run (first must be write) -# clear_cache) not really a test; just uncache everything -# *write*) write -# *) read -#tests=(write rewrite read reread rewrite_again) -tests=(write rewrite clear_cache read reread) - -# total # bytes written/read by any client node -min_per_client_size=4G -min_total_size=8G - -# should each task do I/O to its own file? -file_per_task=1 - -# the binaries -IOR=/usr/local/sbin/IOR -llmount=llmount -pdsh=pdsh - -# the result file prefix (date/time + hostname makes unique) -rslt=/tmp/ior_survey_`date +%F@%R`_`uname -n` - -# where lustre is mounted on the clients -lustre=/mnt/lustre - -# basename of the test file(s) -testfile=${lustre}/ior_survey_testfile - -# pdsh args required to instantiate all instances of IOR in parallel -# the chosen module must support '-n <procs-per-node>' -# -R<module>, -f<fanout> etc -pdsh_mpiargs="-Rmqsh" - -#don't spin for MPI completions -export LIBELAN_WAITTYPE=0 - -################################################################################ -# dont change stuff below here unless you know what you're doing... - -count_range() { - echo $1 | awk '{ nvals=split($1, vals, "-");\ - if (nvals == 1) print 1;\ - else if (nvals == 2) printf "%d\n", vals[2] - vals[1] + 1;}' -} - -base_range() { - echo $1 | awk '{ split($1, vals, "-"); print vals[1]; }' -} - -idx2nodenum() { - local n=$1; shift - while ((1)); do - local range=$1; shift - if [ -z "$range" ]; then - return - fi - chunk=`count_range $range` - if ((chunk > n)); then - base=`base_range $range` - echo $((base + n)) - return - fi - n=$((n-chunk)) - done -} - -n2noderange() { - local n=$1; shift - sep="" - nodes="[" - while ((n > 0)); do - local range=$1; shift - if [ -z "$range" ]; then - return - fi - local base=`base_range $range` - local chunk=`count_range $range` - if ((chunk > n)); then chunk=n; fi - local nodes="${nodes}${sep}${base}"; sep="," - if ((chunk > 1)); then nodes="${nodes}-$((base+chunk-1))"; fi - n=$((n-chunk)) - done - echo "${nodes}]" -} - -countnodes() { - local radix=16384 - local n=0 - while ((radix > 0)); do - local nodes=`n2noderange $((n+radix)) $@` - if [ -n "$nodes" ]; then - n=$((n+radix)) - fi - radix=$((radix/2)) - done - echo $n -} - -parse_number() { - local str=$1 - case $str in - *G|*g) n=`echo $str | sed 's/[gG]//'`; echo $((n*1024*1024*1024));; - *M|*m) n=`echo $str | sed 's/[Mm]//'`; echo $((n*1024*1024));; - *K|*k) n=`echo $str | sed 's/[Kk]//'`; echo $((n*1024));; - *) echo $1;; - esac -} - -pp_number() { - local n=$1 - local G=$((1024*1024*1024)) - local M=$((1024*1024)) - local K=$((1024)) - if ((n%G == 0 && n >= G)); then - echo "$((n/G))G" - elif ((n%M == 0 && n >= M)); then - echo "$((n/M))M" - elif ((n%K == 0 && n >= K)); then - echo "$((n/K))K" - else - echo $n - fi -} - -if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then - echo "First test must be 'write'" 1>&2 - exit 1 -fi - -rsltf="${rslt}.summary" -workf="${rslt}.detail" -echo -n > $rsltf -echo -n > $workf - -print_summary () { - if [ "$1" = "-n" ]; then - minusn=$1; shift - else - minusn="" - fi - echo $minusn "$*" >> $rsltf - echo $minusn "$*" -} - -# convert params to actual numbers -min_per_client_size=`parse_number $min_per_client_size` -min_total_size=`parse_number $min_total_size` - -rsize_lo=`parse_number $rsize_lo` -rsize_hi=`parse_number $rsize_hi` - -# check on actual numbers of client nodes -nclients=`countnodes ${clients[@]}` -if ((clients_hi > nclients)); then clients_hi=$nclients; fi - -for ((rsize=rsize_lo; rsize<=rsize_hi; rsize$rsize_iterator)); do - pp_rsize=`pp_number $rsize` - - for ((nclnt=clients_lo; nclnt<=clients_hi; nclnt$clients_iterator)); do - test_clients="${cluster}`n2noderange $nclnt ${clients[@]}`" - - per_client_size=$((min_total_size/nclnt)) - if ((per_client_size < min_per_client_size)); then - per_client_size=$min_per_client_size - fi - - for ((ntask=tasks_per_client_lo; ntask <= tasks_per_client_hi; ntask$tasks_per_client_iterator)); do - per_task_size=$((per_client_size/ntask)) - if ((per_task_size%rsize != 0)); then - per_task_size=$(((per_task_size/rsize + 1)*rsize)) - fi - total_size=`pp_number $((per_task_size*nclnt*ntask))` - - hdrstr=`printf "Total: %5sB rsize: %4sB clients: %4d tasks: %3d: " \ - $total_size $pp_rsize $nclnt $ntask` - print_summary -n "$hdrstr" - - for ((test_idx=0; test_idx < ${#tests[@]}; test_idx++)); do - test=${tests[$test_idx]} - - print_summary -n "$test " - echo "===========> ${hdrstr} on $test_clients doing $test" >> $workf - tmpf=${workf}_tmp - echo -n > $tmpf - - if [ "$test" = "clear_cache" ]; then - clear_cache='for LRU in /proc/fs/lustre/ldlm/namespaces/*/lru_size; do; echo clear > $LRU; done' - echo "=> $clear_cache" >> $tmpf - $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \ - "$clear_cache" - status=$? - echo "Completion Status: $status" >> $tmpf - - if ((status)); then - result="ERROR" - else - result="OK" - fi - else - # check lustre is mounted everywhere it's needed - cmd="(mount -t lustre; mount -t lustre_lite) | grep $lustre" - echo "=> Mount Check: $cmd" >> $tmpf - $pdsh -S -b -w "$test_clients" >> $tmpf 2>&1 \ - "$cmd" - status=$? - echo "Completion Status: $status" >> $tmpf - if ((status)); then - cat $tmpf >> $workf - rm $tmpf - print_summary "Lustre NOT mounted on $lustre somewhere" - exit 1 - fi - - cmdline=( - $IOR # the command - -o${testfile} # test file prefix - -b${per_task_size} # bytes per task - -t${rsize} # record size - -e # fsync before close - -q # quit on error - ) - - idx=${#cmdline[@]} - - # keep the test file(s) unless this is the last test - ((test_idx < ${#tests[@]}-1)) && cmdline[$((idx++))]="-k" - - # use the existing test file(s) unless this is the first test - ((test_idx > 0)) && cmdline[$((idx++))]="-E" - - # file-per-task - ((file_per_task)) && cmdline[$((idx++))]="-F" - - case "$test" in - *write*) cmdline[$((idx++))]="-w" - awkstr="Max Write";; - *) cmdline[$((idx++))]="-r" - awkstr="Max Read";; - esac - - echo "=> ${cmdline[@]}" >> $tmpf - - $pdsh -S -b $pdsh_mpiargs -w "$test_clients" -n $ntask >> $tmpf 2>&1 \ - "${cmdline[@]}" - status=$? - - echo "Completion Status: $status" >> $tmpf - - if ((status)); then - result="ERROR" - else - result=`awk < $tmpf "/$awkstr/ {print $ 3; found=1; exit}\ - END {if (!found) print \"ERROR\"}"` - fi - fi - - cat $tmpf >> $workf - rm $tmpf - - str=`printf "%8s" "$result"` - print_summary -n "$str " - done - print_summary "" - done - done -done - diff --git a/lustre-iokit/ior-survey/parse-ior b/lustre-iokit/ior-survey/parse-ior deleted file mode 100644 index e751503b09f48ec5188c74b0e21dac5be4e754aa..0000000000000000000000000000000000000000 --- a/lustre-iokit/ior-survey/parse-ior +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/perl -w - -# arg 0 is filename - -sub usages_msg(){ - print "Usage: $0 <results_filename>\n"; - print " parses and plots IOR results using gnuplot, and generates a .dat file for\n"; - print " simple graphing in spreadhseets\n"; - print "e.g.> perl parse-ior.pl ior-log\n"; - exit 1; -} - -if ( !$ARGV[0] ) { - usages_msg(); -} -$file = $ARGV[0]; - -# Open log file for reading -open ( PFILE, "$file") or die "Can't open results log file"; -# Open .csv file for writting required columns from log file. -open ( DATAFILE, "> $file.dat" ) or die "Can't open csv file for writting"; -$count = 0; -while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); # splits line into tokens - if ( $line[0] ) { - # This comparison will be changed if there will be changes log file. - if( $line[0] eq "access" && $line[1] eq "bw(MiB/s)" ) { - print DATAFILE "$count $line[1] $line[4] $line[5] $line[6] br(MiB/s) ropen(s) rd(s) rclose(s)\n"; - $count = $count + 1; - } - # Two columns from output file are skiped since - # they are constant and may not be so useful while graphing results. - if( $line[0] eq "write" ) { - print DATAFILE "$count $line[1] $line[4] $line[5] $line[6] "; - } - if( $line[0] eq "read" ) { - print DATAFILE "$line[1] $line[4] $line[5] $line[6]\n"; - $count = $count + 1; - } - } -} -close PFILE; -close DATAFILE; - -# Open .scr file for writting instructions for gnuplot. -open ( SCRFILE, "> $file.scr" ) or die "Can't open scr file for writting"; -# Only two columns bw(MiB/s) and br(MiB/s) are considered for graphing results. -print SCRFILE "plot \"$file.dat\" using 1:2 axes x1y1 title \"bw(MiB/s)\" with line\n"; -print SCRFILE "replot \"$file.dat\" using 1:6 axes x1y1 title \"br(MiB/s)\" with line\n"; -print SCRFILE "pause -1\n"; -close SCRFILE; -# check whether gnuplot exists? -system ("which gnuplot > /dev/null") == 0 or die "gnuplot does not exists, Please install it and try again.\n"; -# invoke gnuplot to display graph. -system ("gnuplot $file.scr"); diff --git a/lustre-iokit/lustre-iokit.spec.in b/lustre-iokit/lustre-iokit.spec.in deleted file mode 100644 index df50b5fd591d0e3cb90627fe75940be1974d03c9..0000000000000000000000000000000000000000 --- a/lustre-iokit/lustre-iokit.spec.in +++ /dev/null @@ -1,69 +0,0 @@ -# lustre-iokit.spec -%define name @PACKAGE@ -%define version @VERSION@ -%define release @RELEASE@ - -Summary: The Lustre IO-Kit is a collection of benchmark tools for a cluster with the Lustre file system. -Name: %{name} -Version: %{version} -Release: %{release} -License: GPL -Group: Applications/System -Source: %{name}-%{version}.tar.gz -URL: http://clusterfs.com/ -BuildRoot: /var/tmp/%{name}-%{version}-root -Provides: %{name} = %{version} -BuildArch: noarch -Requires: python > 2.2, sg3_utils - -%description -This package includes four tools: -sgpdd-survey: -A test of the 'bare metal' performance, bypassing as much of the kernel as we can. Uses the sgp_dd utility. - -obdfilter-survey -This survey can be run in 3 modes to test disk I/O including the filesystem, -network I/O, and disk I/O via the network. The script does sequential I/O -with varying numbers of threads and objects (files) by using lctl::test_brw -to drive the echo_client connected to local or remote obdfilter instances, -or remote obdecho instances. - -ost-survey -This survey tests the client-to-disk performance of individual OSTs, and -ranks then for comparison. - -ior-survey: -A script to run the IOR benchmark. The latest version can be downloaded from http://www.llnl.gov/asci/purple/benchmarks/limited/ior/ - -%prep -%setup -qn %{name}-%{version} - -%build -rm -fr $RPM_BUILD_ROOT -./configure --prefix=/usr -make - -%install -make install DESTDIR=$RPM_BUILD_ROOT - -%files -/usr/bin/ior-survey -/usr/bin/parse-ior -/usr/bin/create-echoclient -/usr/bin/obdfilter-survey -/usr/bin/obparse -/usr/bin/plot-ost -/usr/bin/ost-survey -/usr/bin/sgpdd-survey -/usr/bin/parse-sgpdd -%doc obdfilter-survey/README.obdfilter-survey -%doc ior-survey/README.ior-survey -%doc ost-survey/README.ost-survey -%doc sgpdd-survey/README.sgpdd-survey - - -%changelog -* Mon Apr 9 2007 Cliff White -- Merged with existing, changed to .in format. -* Thu Oct 4 2006 Kalpak Shah -- Created the spec file. diff --git a/lustre-iokit/obdfilter-survey/Makefile.am b/lustre-iokit/obdfilter-survey/Makefile.am deleted file mode 100644 index e35f831a2002253e1afbeef4279188dedf18a352..0000000000000000000000000000000000000000 --- a/lustre-iokit/obdfilter-survey/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -bin_SCRIPTS = create-echoclient obdfilter-survey obparse -CLEANFILE = $(bin_SCRIPTS) -EXTRA_DIST = README.obdfilter-survey create-echoclient obdfilter-survey obparse diff --git a/lustre-iokit/obdfilter-survey/README.obdfilter-survey b/lustre-iokit/obdfilter-survey/README.obdfilter-survey deleted file mode 100644 index b9e82bc11ac1936ed4eb3fe3e934049c3ef1d70c..0000000000000000000000000000000000000000 --- a/lustre-iokit/obdfilter-survey/README.obdfilter-survey +++ /dev/null @@ -1,216 +0,0 @@ - -Overview --------- - -This survey script does sequential I/O with varying numbers of threads and -objects (files) by using lctl::test_brw to drive the echo_client connected -to local or remote obdfilter instances, or remote obdecho instances. - -It can be used to characterise the performance of the following lustre -components. - -1. The Stripe F/S. - - Here the script directly exercises one or more instances of obdfilter. - They may be running on 1 or more nodes, e.g. when they are all attached - to the same multi-ported disk subsystem. - - You need to tell the script all the names of the obdfilter instances. - These should be up and running already . If some are on different - nodes, you need to specify their hostnames too (e.g. node1:ost1). - - All the obdfilter instances are driven directly. The script - automatically loads the obdecho module if required and creates one - instance of echo_client for each obdfilter instance. - -2. The Network. - - Here the script drives one or more instances of obdecho via instances of - echo_client running on 1 or more nodes. - - You need to tell the script all the names of the echo_client instances. - These should already be up and running. If some are on different nodes, - you need to specify their hostnames too (e.g. node1:ECHO_node1). - -3. The Stripe F/S over the Network. - - Here the script drives one or more instances of obdfilter via instances - of echo_client running on 1 or more nodes. - - As with (2), you need to tell the script all the names of the - echo_client instances, which should already be up and running. - -Note that the script is _NOT_ scalable to 100s of nodes since it is only -intended to measure individual servers, not the scalability of the system -as a whole. - - -Running -------- - -The script must be customised according to the components under test and -where it should keep its working files. Customisation variables are -described clearly at the start of the script. - -To run against a local disk: ---------------------------- - -- Create a Lustre configuraton shell script and XML using your normal -methods - - You do not need to specify and MDS or LOV - - List all OSTs that you wish to test - -- On all OSS machines: - # lconf --refomat <XML file> - Remember, write tests are -destructive! This test should be run prior to startup of your actual -Lustre filesystem. If that is the case, you will not need to reformat -to restart Lustre - however, if the test is terminated before -completion, you may have to remove objects from the disk. - -- Determine the obdfilter instance names on all the clients, column 4 -of 'lctl dl'. For example: - -# pdsh -w oss[01-02] lctl dl |grep obdfilter |sort -oss01: 0 UP obdfilter oss01-sdb oss01-sdb_UUID 3 -oss01: 2 UP obdfilter oss01-sdd oss01-sdd_UUID 3 -oss02: 0 UP obdfilter oss02-sdi oss02-sdi_UUID 3 -... - -Here the obdfilter instance names are oss01-sdb, oss01-sdd, oss02-sdi. - -Since you are driving obdfilter instances directly, set the shell array -variable 'ost_names' to the names of the obdfilter instances and leave -'client_names' undefined. -Example: - -ost_names_str='oss01:oss01-sdb oss01:oss01-sdd oss02:oss02-sdi' \ - ./obdfilter-survey - -To run against a network: ------------------------- - -If you are driving obdfilter or obdecho instances over the network, you -must instantiate the echo_clients yourself using lmc/lconf. Set the shell -array variable 'client_names' to the names of the echo_client instances and -leave 'ost_names' undefined. - -You can optionally prefix any name in 'ost_names' or 'client_names' with -the hostname that it is running on (e.g. remote_node:ost4) if your -obdfilters or echo_clients are running on more than one node. In this -case, you need to ensure... - -(a) 'custom_remote_shell()' works on your cluster -(b) all pathnames you specify in the script are mounted on the node you - start the survey from and all the remote nodes. -(c) obdfilter-survey must be installed on the clients, in the same - location as on the master node. - -- First, bring up obdecho instances on the servers and echo_client instances -on the clients: - - run the included create-echoclient on a node that has Lustre installed. - -shell variables: - - SERVERS: Set this to a list of server hostnames, or `hostname` of - the current node will be used. This may be the wrong interface, so - check it. NOTE: create-echoclient could probably be smarter about this... - - - NETS: set this if you are using a network type other than -tcp. - - example: SERVERS=oss01-eth2 sh create-echoclient - -- On the servers start the obdecho server and verify that it is up: - -# lconf --node (hostname) /(path)/echo.xml -# lctl dl - 0 UP obdecho ost_oss01.local ost_oss01.local_UUID 3 - 1 UP ost OSS OSS_UUID 3 - -- On the clients start the other side of the echo connection: - -# lconf --node client /(path)/echo.xml -# lctl dl - 0 UP osc OSC_xfer01.local_ost_oss01.local_ECHO_client 6bc9b_ECHO_client_2a8a2cb3dd 5 - 1 UP echo_client ECHO_client 6bc9b_ECHO_client_2a8a2cb3dd 3 - -- verify connectivity from a client: - - lctl ping SERVER_NID - -- Run the script on the master node, specifying the client names in an -environment variable - -Example: -# client_names_str='xfer01:ECHO_client xfer02:ECHO_client -xfer03:ECHO_client xfer04:ECHO_client xfer05:ECHO_client -xfer06:ECHO_client xfer07:ECHO_client xfer08:ECHO_client -xfer09:ECHO_client xfer10:ECHO_client xfer11:ECHO_client -xfer12:ECHO_client' ./obdfilter-survey - - -- When done: cleanup echo_client/obdecho instances: - - on clients: lconf --cleanup --node client /(path)/echo.xml - - on server(s): lconf --cleanup --node (hostname) /(path)/echo.xml - -- When aborting: killall vmstat on clients: - -pdsh -w (clients) killall vmstat - -Use 'lctl device_list' to verify the obdfilter/echo_client instance names -e.g... - -When the script runs, it creates a number of working files and a pair of -result files. All files start with the prefix given by ${rslt}. - -${rslt}.summary same as stdout -${rslt}.script_* per-host test script files -${rslt}.detail_tmp* per-ost result files -${rslt}.detail collected result files for post-mortem - -The script iterates over the given numbers of threads and objects -performing all the specified tests and checking that all test processes -completed successfully. - -Note that the script does NOT clean up properly if it is aborted or if it -encounters an unrecoverable error. In this case, manual cleanup may be -required, possibly including killing any running instances of 'lctl' (local -or remote), removing echo_client instances created by the script and -unloading obdecho. - - -Script output -------------- - -The summary file and stdout contain lines like... - -ost 8 sz 67108864K rsz 1024 obj 8 thr 8 write 613.54 [ 64.00, 82.00] - -ost 8 is the total number of OSTs under test. -sz 67108864K is the total amount of data read or written (in KB). -rsz 1024 is the record size (size of each echo_client I/O, in KB). -obj 8 is the total number of objects over all OSTs -thr 8 is the total number of threads over all OSTs and objects -write is the test name. If more tests have been specified they - all appear on the same line. -613.54 is the aggregate bandwidth over all OSTs measured by - dividing the total number of MB by the elapsed time. -[64.00, 82.00] are the minimum and maximum instantaneous bandwidths seen on - any individual OST. - -Note that although the numbers of threads and objects are specifed per-OST -in the customisation section of the script, results are reported aggregated -over all OSTs. - - -Visualising Results -------------------- - -I've found it most useful to import the summary data (it's fixed width) -into Excel (or any graphing package) and graph bandwidth v. # threads for -varying numbers of concurrent regions. This shows how the OSS performs for -a given number of concurrently accessed objects (i.e. files) with varying -numbers of I/Os in flight. - -It is also extremely useful to record average disk I/O sizes during each -test. These numbers help find pathologies in file the file system block -allocator and the block device elevator. - -The included obparse.pl script is an example of processing the output files to -a .csv format. diff --git a/lustre-iokit/obdfilter-survey/create-echoclient b/lustre-iokit/obdfilter-survey/create-echoclient deleted file mode 100755 index 822f061029bc31bd5a442d8a3fb8e1e7313539f3..0000000000000000000000000000000000000000 --- a/lustre-iokit/obdfilter-survey/create-echoclient +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/bash - -# This script will create a Lustre .xml configuration consisting -# of echo client/servers for use with the obdfilter-survey test - -####################################################################### -# Customisation variables -####################################################################### - -config=$(basename $0 .sh).xml - -SERVERS=${SERVERS:-$(uname -n)} - -NETS=${NETS:-tcp} - -LMC=lmc -VERBOSE=1 -BATCH=/tmp/lmc-batch.$$ - -####################################################################### -# some helpers: actual config below -####################################################################### - -h2elan () { - echo $1 | sed 's/[^0-9]*//g' -} - -_LMC="${LMC} -m $config" - -_lmc () { - if [ $VERBOSE ]; then echo "$@"; fi - if [ -n "$BATCH" ]; then - echo "$@" >> $BATCH - else - $_LMC "$@" - fi -} - -config_end () { - [ -n "$BATCH" ] && $_LMC --batch $BATCH - cleanup -} - -cleanup () { - [ -n "$BATCH" ] && rm -f $BATCH -} - -ABORT_ON="ERR QUIT INT HUP" - -abort () { - trap - EXIT $ABORT_ON - echo "Error/Interrupt creating $config" - cleanup - exit 1 -} - -trap config_end EXIT -trap abort $ABORT_ON - -[ -f $config ] && rm $config - -#################################################################### -# the actual config -#################################################################### - -# client net -_lmc --node client --add net --nettype lnet --nid '*' - -for srv in $SERVERS; do - for net in $NETS; do - case $net in - elan*) nid=`h2elan $srv`;; - gm*) nid=`gmnalnid -n $srv`;; - *) nid=$srv;; - esac - _lmc --node $srv --add net --nettype lnet --nid ${nid}@${net} - done - - _lmc --node $srv --add ost --ost ost_$srv --osdtype=obdecho - - _lmc --node client --add echo_client --ost ost_$srv -done diff --git a/lustre-iokit/obdfilter-survey/obdfilter-survey b/lustre-iokit/obdfilter-survey/obdfilter-survey deleted file mode 100755 index 90c6b0b218b3246ee43a13c9da08ea0ff15ef032..0000000000000000000000000000000000000000 --- a/lustre-iokit/obdfilter-survey/obdfilter-survey +++ /dev/null @@ -1,591 +0,0 @@ -#!/bin/bash - -###################################################################### -# customize per survey - -# specify obd instances to exercise -# these can be either... -# obdfilter instances (set 'ost_names') -# ...or... -# echo_client instances (set 'client_names') -# ... use 'host:name' for obd instances on other nodes. - -# allow these to be passed in via string... -ost_names_str=${ost_names_str:-""} -if [ -n "$ost_names_str" ]; then - declare -a ost_names - count=0 - for name in $ost_names_str; do - ost_names[$count]=$name - count=$((count+1)) - done -else - ost_names=(ost{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}) -fi - -#client_names=(ns8:ECHO_ns8 ns9:ECHO_ns9) -client_names_str=${client_names_str:-""} -if [ -n "$client_names_str" ]; then - # make sure we unset ost_names so that our client_names get noticed... - unset ost_names - declare -a client_names - count=0 - for name in $client_names_str; do - client_names[$count]=$name - count=$((count+1)) - done -fi - -# result file prefix (date/time + hostname makes unique) -# NB ensure path to it exists -rslt=${rslt:-"/tmp/obdfilter_survey_`date +%F@%R`_`uname -n`"} - -# lustre root (if running with own source tree) -# lustre_root=${lustre_root:-"/my/directory/lustre"} - -# what tests to run (first must be write) -tests_str=${tests_str:-""} -if [ -n "$tests_str" ]; then - declare -a tests - count=0 - for name in $tests_str; do - tests[$count]=$name - count=$((count+1)) - done -else - #tests=(write rewrite read reread rewrite_again) - tests=(write rewrite read) -fi - -# Set this true to check file contents -verify=0 - -# total size (MBytes) per obd instance -# large enough to avoid cache effects -# and to make test startup/shutdown overhead insignificant -size=${size:-16384} - -# record size (KBytes) -rszlo=${rszlo:-1024} -rszhi=${rszhi:-1024} - -# number of objects per OST -nobjlo=${nobjlo:-1} -nobjhi=${nobjhi:-512} - -# threads per OST (1024 max) -thrlo=${thrlo:-1} -thrhi=${thrhi:-64} - -# restart from here iff all are defined -restart_rsz= -restart_thr=1 -restart_nobj=1 - -# machine's page size (K) -if [ -z "$PAGE_SIZE" ]; then - if which python >/dev/null; then - PAGE_SIZE=`echo 'import resource; print resource.getpagesize()/1024;' |python` - fi -fi -PAGE_SIZE=${PAGE_SIZE:-4} - -# max buffer_mem (total_threads * buffer size) -# (to avoid lctl ENOMEM problems) -max_buffer_mem=$((1024*1024)) - -# how to run commands on other nodes -# You need to make this work on your cluster if you have specified -# non-local obd instances above -custom_remote_shell () { - host=$1 - shift - cmds="$*" - here=`pwd` - # Hop on to the remote node, chdir to 'here' and run the given - # commands. One of the following will probably work. - ssh $host "cd $here; $cmds" - #rsh $host "cd $here; $cmds" - # we have to remove the leading `uname -n`: from pdsh output lines - #pdsh -w $host "cd $here; $cmds" | sed 's/^[^:]*://' -} - -##################################################################### -# leave the rest of this alone unless you know what you're doing... - -# binaries -lsmod="/sbin/lsmod" -modprobe="/sbin/modprobe" -insmod="/sbin/insmod" -rmmod="/sbin/rmmod" - -# lctl::test_brw bandwidth snapshot interval (seconds) -snap=1 - - -if [ ${#tests[@]} -eq 0 -o "${tests[0]}" != "write" ]; then - echo "tests: ${tests[@]}" - echo "First test must be 'write'" 1>&2 - exit 1 -fi - -rsltf="${rslt}.summary" -workf="${rslt}.detail" -cmdsf="${rslt}.script" -vmstatf="${rslt}.vmstat" -echo -n > $rsltf -echo -n > $workf - -declare -a vmstatpids - -# hide a little trick to unset this from the command line -if [ "$lustre_root" == " " ]; then - unset lustre_root -fi - -if [ -z "$lustre_root" ]; then - lctl=lctl -else - lctl=${lustre_root}/utils/lctl -fi - -remote_shell () { - host=$1 - shift - cmds="$*" - if [ "$host" = "localhost" -o "$host" = `uname -n` ]; then - eval "$cmds" - else - custom_remote_shell $host "$cmds" - fi -} - -obdecho_loaded() { - local host=$1 - remote_shell $host $lsmod | grep obdecho > /dev/null 2>&1 -} - -load_obdecho () { - local host=$1 - if [ -z "$lustre_root" ]; then - remote_shell $host $modprobe obdecho - elif [ -f ${lustre_root}/obdecho/obdecho.ko ]; then - remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.ko - else - remote_shell $host $insmod ${lustre_root}/obdecho/obdecho.o - fi -} - -unload_obdecho () { - local host=$1 - remote_shell $host $rmmod obdecho -} - -get_devno () { - local host=$1 - local type=$2 - local name=$3 - remote_shell $host $lctl device_list | \ - awk "{if (\$2 == \"UP\" && \$3 == \"$type\" && \$4 == \"$name\") {\ - print \$1; exit}}" -} - -get_ec_devno () { - local host=$1 - local client_name="$2" - local ost_name="$3" - if [ -z "$client_name" ]; then - if [ -z "$ost_name" ]; then - echo "client and ost name both null" 1>&2 - return - fi - client_name=${ost_name}_echo_client - fi - ec=`get_devno $host echo_client $client_name` - if [ -n "$ec" ]; then - echo $ec $client_name 0 - return - fi - if [ -z "$ost_name" ]; then - echo "no echo client and ost_name not set, client: $client_name, host: $host" 1>&2 - return - fi - ost=`get_devno $host obdfilter $ost_name` - if [ -z "$ost" ]; then - echo "OST $ost_name not setup" 1>&2 - return - fi - remote_shell $host "$lctl <<EOF - attach echo_client $client_name ${client_name}_UUID - setup $ost_name -EOF" - ec=`get_devno $host echo_client $client_name` - if [ -z "$ec" ]; then - echo "Can't setup echo client" 1>&2 - return - fi - echo $ec $client_name 1 -} - -teardown_ec_devno () { - local host=$1 - local client_name=$2 - remote_shell $host "$lctl <<EOF - cfg $client_name - cleanup - detach -EOF" -} - -create_objects () { - # create a set of objects, check there are 'n' contiguous ones and - # return the first or 'ERROR' - local host=$1 - local devno=$2 - local nobj=$3 - local rfile=$4 - remote_shell $host $lctl --device $devno create $nobj > $rfile 2>&1 - first=0 - prev=0 - count=0 - error=0 - while read line; do - echo "$line" | grep -q 'is object id' - if [ $? -ne 0 ]; then - continue - fi - if [ $first -eq 0 ]; then - first=$(echo $line | awk '{print $6}') - first=$(printf "%d" $first) - prev=$first - count=1 - else - obj=$(echo $line | awk '{print $6}') - obj=$(printf "%d" $obj) - diff=$((obj - (prev+1))) - if [ $diff -ne 0 ]; then - error=1 - fi - prev=$obj - count=$((count+1)) - fi - done < $rfile - if [ $nobj -ne $count ]; then - echo "ERROR: $nobj != $count" >&2 - cat $rfile >&2 - echo "ERROR" - elif [ $error -ne 0 ]; then - echo "ERROR: non contiguous objs found" >&2 - echo "ERROR" - else - echo $first - fi -} - -destroy_objects () { - local host=$1 - local devno=$2 - local obj0=$3 - local nobj=$4 - local rfile=$5 - remote_shell $host $lctl --device $devno destroy $obj0 $nobj > $rfile 2>&1 -} - -get_stats () { - local rfile=$1 - awk < $rfile \ - '/^Selected device [0-9]+$/ {n = 0; next}\ - /error/ {n = -1; exit}\ - /^[0-9]+\/[0-9]+ Total: [0-9]+\.[0-9]+\/second$/ {n++; v=strtonum($3); \ - if (n == 1 || v < min) min = v;\ - if (n == 1 || v > max) max = v;\ - next}\ - {if (n != 0) {n = -1; exit}}\ - END {printf "%d %f %f\n", n, min, max}' -} - -get_global_stats () { - local rfile=$1 - awk < $rfile 'BEGIN {n = 0;}\ - {n++; if (n == 1) {err = $1; min = $2; max = $3} else\ - {if ($1 < err) err = $1;\ - if ($2 < min) min = $2;\ - if ($3 > max) max = $3}}\ - END {if (n == 0) err = 0;\ - printf "%d %f %f\n", err, min, max}' -} - -testname2type () { - # 'x' disables data check - if ((verify)); then - x="" - else - x="x" - fi - case $1 in - *write*) echo "w$x";; - *) echo "r$x";; - esac -} - -print_summary () { - if [ "$1" = "-n" ]; then - minusn=$1; shift - else - minusn="" - fi - echo $minusn "$*" >> $rsltf - echo $minusn "$*" -} - -unique () { - echo "$@" | xargs -n1 echo | sort -u -} - -split_hostname () { - local name=$1 - case $name in - *:*) host=`echo $name | sed 's/:.*$//'` - name=`echo $name | sed 's/[^:]*://'` - ;; - *) host=localhost - ;; - esac - echo "$host $name" -} - -# split out hostnames from client/ost names -ndevs=${#client_names[@]} -if ((ndevs != 0)); then - if ((${#ost_names[@]} != 0)); then - echo "Please specify client_names or ost_names, but not both" 1>&2 - exit 1 - fi - for ((i=0; i<ndevs;i++)); do - str=(`split_hostname ${client_names[$i]}`) - host_names[$i]=${str[0]} - client_names[$i]=${str[1]} - done -else - ndevs=${#ost_names[@]} - if ((ndevs == 0)); then - echo "Please specify either client_names or ost_names" 1>&2 - exit 1 - fi - for ((i=0; i<ndevs;i++)); do - str=(`split_hostname ${ost_names[$i]}`) - host_names[$i]=${str[0]} - ost_names[$i]=${str[1]} - done -fi - -# get vmstat started -# disable LNET debug and get obdecho loaded on all relevant hosts -unique_hosts=(`unique ${host_names[@]}`) -pidcount=0 -for host in ${unique_hosts[@]}; do - remote_shell $host "echo 0 > /proc/sys/lnet/debug" - host_vmstatf=${vmstatf}_${host} - echo -n > $host_vmstatf - remote_shell $host "vmstat 5 >> $host_vmstatf" & - pid=$! - vmstatpids[$pidcount]=$pid - pidcount=$((pidcount+1)) - do_unload_obdecho[$host]=0 - if obdecho_loaded $host; then - continue - fi - load_obdecho $host - if obdecho_loaded $host; then - do_unload_obdecho[$host]=1 - continue - fi - echo "Can't load obdecho on $host" 1>&2 - exit 1 -done - -# get all the echo_client device numbers and names -for ((i=0; i<ndevs; i++)); do - host=${host_names[$i]} - devno=(`get_ec_devno $host "${client_names[$i]}" "${ost_names[$i]}"`) - if ((${#devno[@]} != 3)); then - exit 1 - fi - devnos[$i]=${devno[0]} - client_names[$i]=${devno[1]} - do_teardown_ec[$i]=${devno[2]} -done - -for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do - for ((nobj=$nobjlo;nobj<=$nobjhi;nobj*=2)); do - for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do - if ((thr % nobj)); then - continue - fi - # restart? - if [ -n "$restart_rsz" -a\ - -n "$restart_nobj" -a\ - -n "$restart_thr" ]; then - if ((rsz < restart_rsz ||\ - (rsz == restart_rsz &&\ - (nobj < restart_nobj ||\ - (nobj == restart_nobj &&\ - thr < restart_thr))))); then - continue; - fi - fi - # compute parameters - total_thr=$((ndevs*thr)) - total_nobj=$((ndevs*nobj)) - pages=$((rsz/PAGE_SIZE)) - actual_rsz=$((pages*PAGE_SIZE)) - count=$((size*1024/(actual_rsz*thr))) - actual_size=$((actual_rsz*count*thr)) - total_size=$((actual_size*ndevs)) - # show computed parameters - str=`printf 'ost %2d sz %8dK rsz %4d obj %4d thr %4d ' \ - $ndevs $total_size $actual_rsz $total_nobj $total_thr` - echo "=======================> $str" >> $workf - print_summary -n "$str" - if ((total_thr * actual_rsz > max_buffer_mem)); then - print_summary "Too much buffer space" - continue - fi - # create the objects - tmpf="${workf}_tmp" - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - client_name="${host}:${client_names[$idx]}" - echo "=============> Create $nobj on $client_name" >> $workf - first_obj=`create_objects $host $devno $nobj $tmpf` - cat $tmpf >> $workf - rm $tmpf - if [ $first_obj = "ERROR" ]; then - print_summary "created object #s on $client_name not contiguous" - exit 1 - fi - first_objs[$idx]=$first_obj - done - # run tests - for test in ${tests[@]}; do - declare -a pidarray - for host in ${unique_hosts[@]}; do - echo "starting run for test: $test rsz: $rsz threads: $thr objects: $nobj" >> ${vmstatf}_${host} - done - print_summary -n "$test " - # create per-host script files - for host in ${unique_hosts[@]}; do - echo -n > ${cmdsf}_${host} - done - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - tmpfi="${tmpf}_$idx" - first_obj=${first_objs[$idx]} - thr_per_obj=$((${thr}/${nobj})) - echo >> ${cmdsf}_${host} \ - "$lctl > $tmpfi 2>&1 \\ - --threads $thr -$snap $devno \\ - test_brw $count `testname2type $test` q $pages ${thr_per_obj}t${first_obj} &" - done - pidcount=0 - for host in ${unique_hosts[@]}; do - echo "wait" >> ${cmdsf}_${host} - pidarray[$pidcount]=0 - pidcount=$((pidcount+1)) - done - # timed run of all the per-host script files - t0=`date +%s.%N` - pidcount=0 - for host in ${unique_hosts[@]}; do - # brutal hack to deal with a non-shared /tmp - scp -q ${cmdsf}_${host} ${host}:/tmp > /dev/null - remote_shell $host bash ${cmdsf}_${host} & - pidarray[$pidcount]=$! - pidcount=$((pidcount+1)) - done - pidcount=0 - for host in ${unique_hosts[@]}; do - wait ${pidarray[$pidcount]} - pidcount=$((pidcount+1)) - done - #wait - t1=`date +%s.%N` - # clean up per-host script files - for host in ${unique_hosts[@]}; do - rm ${cmdsf}_${host} - done - # compute bandwidth from total data / elapsed time - str=`awk "BEGIN {printf \"%7.2f \",\ - $total_size / (( $t1 - $t0 ) * 1024)}"` - print_summary -n "$str" - # collect/check individual OST stats - echo -n > $tmpf - for ((idx=0; idx < ndevs; idx++)); do - client_name="${host_names[$idx]}:${client_names[$idx]}" - tmpfi="${tmpf}_$idx" - echo "=============> $test $client_name" >> $workf - host="${host_names[$idx]}" - scp -q ${host}:$tmpfi $tmpfi > /dev/null - cat $tmpfi >> $workf - get_stats $tmpfi >> $tmpf - rm $tmpfi - done - # compute/display global min/max stats - echo "=============> $test global" >> $workf - cat $tmpf >> $workf - stats=(`get_global_stats $tmpf`) - rm $tmpf - if ((stats[0] <= 0)); then - if ((stats[0] < 0)); then - str=`printf "%17s " ERROR` - else - str=`printf "%17s " SHORT` - fi - else - str=`awk "BEGIN {printf \"[%7.2f,%7.2f] \",\ - (${stats[1]} * $actual_rsz)/1024,\ - (${stats[2]} * $actual_rsz)/1024; exit}"` - fi - print_summary -n "$str" - done - print_summary "" - # destroy objects we created - for ((idx=0; idx < ndevs; idx++)); do - host=${host_names[$idx]} - devno=${devnos[$idx]} - client_name="${host}:${client_names[$idx]}" - first_obj=${first_objs[$idx]} - echo "=============> Destroy $nobj on $client_name" >> $workf - destroy_objects $host $devno $first_obj $nobj $tmpf - cat $tmpf >> $workf - rm $tmpf - done - done - done -done - -# tear down any echo clients we created -for ((i=0; i<ndevs; i++)); do - host=${host_names[$i]} - if ((${do_teardown_ec[$i]})); then - teardown_ec_devno $host ${client_names[$i]} - fi -done - -# unload any obdecho modules we loaded -pidcount=0 -for host in ${unique_hosts[@]}; do - remote_shell $host "killall vmstat" & - pid=$! - kill -term ${vmstatpids[$pidcount]} - kill -kill ${vmstatpids[$pidcount]} 2>/dev/null - wait $pid - pidcount=$((pidcount+1)) - if ((${do_unload_obdecho[$host]})); then - unload_obdecho $host - fi -done - -exit 0 diff --git a/lustre-iokit/obdfilter-survey/obparse b/lustre-iokit/obdfilter-survey/obparse deleted file mode 100644 index 2b83b8d4787d63ea0ebd8ce5cb98246ce1128b51..0000000000000000000000000000000000000000 --- a/lustre-iokit/obdfilter-survey/obparse +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -w - -# parses obdfilter output from our script goop -# arg 0 input filename -# arg 1 is 'w' 'r' - -$file = $ARGV[0]; - -$type = $ARGV[1]; -print "$file\n"; - - -open ( PFILE, "$file") or die "Can't open results"; -while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); - if ( $type eq 'w' ) { - # print "$line[5] $line[7] $line[9]\n"; - # if( $line[9]) { - $out{$line[7]}{$line[9]} = $line[11]; - # } - } elsif ( $type eq 'r' ) { - # if( $line[18]) { - # print "$line[5] $line[7] $line[18]\n"; - $out{$line[7]}{$line[9]} = $line[21]; - } else { - # if( $line[18]) { - # print "$line[5] $line[7] $line[18]\n"; - $out{$line[7]}{$line[9]} = $line[16]; - } -} - - foreach $crg ( sort { $a <=> $b } ( keys %out )) { - print "$crg"; - @list = ( sort { $a <=> $b } ( keys %{ $out{$crg} } )); - foreach $thr ( @list ) { - # These are the headers - print ",$thr"; - } - print "\n"; - print "$crg"; - foreach $tthr ( @list ) { - print ",$out{$crg}{$tthr}"; - } - print "\n"; - } - - diff --git a/lustre-iokit/ost-survey/Makefile.am b/lustre-iokit/ost-survey/Makefile.am deleted file mode 100644 index 5d2c14a07c06cfee6b06f03c80196428ee281f9b..0000000000000000000000000000000000000000 --- a/lustre-iokit/ost-survey/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -bin_SCRIPTS = ost-survey plot-ost -CLEANFILE = $(bin_SCRIPTS) -EXTRA_DIST = README.ost-survey ost-survey plot-ost diff --git a/lustre-iokit/ost-survey/README.ost-survey b/lustre-iokit/ost-survey/README.ost-survey deleted file mode 100644 index 1541939d8fba30c7e5bae54d41d5ec97da913351..0000000000000000000000000000000000000000 --- a/lustre-iokit/ost-survey/README.ost-survey +++ /dev/null @@ -1,26 +0,0 @@ -ost-survey.pl (OST performance survey) -====================================== - This script is designed to test the client-to-disk performance -of the individual OSTs in a Lustre filesystem. The network transfer -time from the client is included; to get a better idea of the isolated -disk perfomance, run this script on a client running on the OST. - -Syntax: - $ ost-survey [-h] [-s <size>] <lustre_path> - where -s : size in MB - -h : help - <lustre_path> : mount point of lustre client - -Assumptions - - Lustre filesystem is up and running - - Script is being run on a client - - -plot-ost.pl (OST survey graph) -====================================== - The plot-ost.pl script can be used to plot the results from the -ost-survey script using gnuplot. - -Syntax: $ ost-survey /mnt/lustre > ost_log - $ plot-ost.pl ost_log - diff --git a/lustre-iokit/ost-survey/ost-survey b/lustre-iokit/ost-survey/ost-survey deleted file mode 100755 index fc93117363c1242e2efa64a3a97c59ee0e6a36ce..0000000000000000000000000000000000000000 --- a/lustre-iokit/ost-survey/ost-survey +++ /dev/null @@ -1,271 +0,0 @@ -#!/usr/bin/perl -# This script is to be run on a client machine and will test all the -# OSTs to determine which is the fastest and slowest -# The current test method is as follows: -# -Create a directory for each OST -# -Use 'lfs setstripe' to set the Lustre striping such that IO goes to -# only one OST -# -Use 'dd' to write and read a file of a specified size -# -Compute the average, and Standard deviation -# -Find the slowest OST for read and write -# -Find the Fastest OST for read and write - -# GLOBALS -$pname = $0; # to hold program name -$OSTS = 0; # Number of OSTS we will loop over -$BSIZE = 1024 * 1024; # Size of i/o block -$MNT = "/mnt/lustre"; # Location of Lustre file system -$FSIZE = 30; # Number of i/o blocks - -# Usage -sub usage () { - print "Usage: $pname [-s <size>] [-h] <Lustre_Path>\n"; - print "[OPTIONS]\n"; - print " -s: size of test file in MB (default $FSIZE MB)\n"; - print " -h: To display this help\n"; - print "example : $pname /mnt/lustre\n"; - exit 1; -} - -# ost_count subroutine ets globle variable $OST with Number of OST's -# Also fills 1 for active OST indexes in ACTIVEOST_INX array. -sub ost_count () { - # numobd gives number of ost's and activeobd gives number of active ost's - my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/activeobd"); - open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n"; - $OSTS = <PTR>; - close PTR; - print "Number of Active OST devices : $OSTS"; - my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/numobd"); - open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n"; - $numost = <PTR>; - close PTR; - if ( $numost != $OSTS ) { - printf "Number of non active ots(s): %d\n", ( $numost - $OSTS ); - $OSTS = $numost; - } - my $tempfile = glob ("/proc/fs/lustre/lov/*-clilov-*/target_obd"); - open(PTR, $tempfile) || die "Cannot open $tempfile: $!\n"; - my $count = 0; - my $temp; - while (<PTR>) { - chop; - my ($ost_num, $ost_name, $ost_status) = split(/\s+/, $_); - if ( $ost_status eq "ACTIVE" ) { - $ACTIVEOST_INX[$count] = 1; - } - $count++; - } -} - -sub cache_off () { - $CACHEFILE = glob ("/proc/fs/lustre/llite/*/max_cached_mb"); - open(PTR, $CACHEFILE) || die "Cannot open $tempfile: $!\n"; - $CACHESZ = 0 + <PTR>; - close PTR; - system("echo 0 >> $CACHEFILE"); -} - -sub cache_on () { - system("echo $CACHESZ >> $CACHEFILE"); -} - -# make_dummy subroutine creates a dummy file that will be used for read operation. -sub make_dummy () { - my $SIZE = $_[0]; - my $tempfile = $_[1]; - system ("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null"); -} - -# run_test subroutine actually writes and reads data to/from dummy file -# and compute corresponding time taken for read and write operation and -# byte transfer for the both operations. -# This subroutine also fill corresponding globle arrays with above information. -sub run_test () { - my $SIZE = $_[0]; - my $INX=$_[1]; - my $ACTION=$_[2]; - my $tempfile = $_[3]; - - if ( !(-f $tempfile) && $ACTION eq "read" ) { - &make_dummy($SIZE, $tempfile); - } - system("sync"); - my ($ts0, $tu0) = gettimeofday(); - $tu0 = $ts0 + ($tu0 / 1000000); - if ( $ACTION eq "write" ) { - system("dd of=$tempfile if=/dev/zero count=$SIZE bs=$BSIZE 2> /dev/null"); - } elsif ( $ACTION eq "read" ) { - system("dd if=$tempfile of=/dev/null count=$SIZE bs=$BSIZE 2> /dev/null"); - } else { - print "Action is neither read nor write\n"; - exit 1; - } - system("sync"); - my ($ts1, $tu1) = gettimeofday(); - $tu1 = $ts1 + ($tu1/1000000); - my $tdelta = $tu1 - $tu0; - my $delta = ($SIZE * $BSIZE / ( $tu1 - $tu0 )) / (1024 * 1024); - if ( $ACTION eq "write" ) { - $wTime[$INX] = $tdelta; - $wMBs[$INX] = $delta; - } else { - $rTime[$INX] = $tdelta; - $rMBs[$INX] = $delta; - } -} - -# calculate subroutine compute following things and displays them. -# - Finds worst and best OST for both read and write operations. -# - Compute average of read and write rate from all OSTS -# - Compute Standard deviation for read and write form all OST's -sub calculate () { - my ($op, $MBs); - $op = $_[0]; - @MBs = @_[1..$#_]; - my $count = 0; - my $total = 0; - my $avg = 0; - my $sd = 0; - my $best_OST = 0; - my $worst_OST = 0; - my $max_mb = 0; - my $min_mb = 999999999; - while ($count < $OSTS ) { - if ( $ACTIVEOST_INX[$count] ) { - $total = $total + $MBs[$count]; - if ($max_mb < $MBs[$count] ) { - $max_mb = $MBs[$count]; - $best_OST = $count; - } - if ($min_mb > $MBs[$count] ) { - $min_mb = $MBs[$count]; - $worst_OST = $count; - } - } - $count++; - } - $avg = $total/$OSTS; - $total = 0; - $count = 0; - while ($count < $OSTS ) { - if ( $ACTIVEOST_INX[$count] ) { - $total = $total + ($MBs[$count] - $avg) * ($MBs[$count] - $avg); - } - $count++; - } - $sd = sqrt($total/$OSTS); - printf "Worst %s OST indx: %d speed: %f\n", $op, $worst_OST, $min_mb; - printf "Best %s OST indx: %d speed: %f\n", $op, $best_OST, $max_mb; - printf "%s Average: %f +/- %f MB/s\n", $op, $avg, $sd; -} - -# output_all_data subroutine displays speed and time information -# for all OST's for both read and write operations. -sub output_all_data () { - my $count = 0; - print "Ost# Read(MB/s) Write(MB/s) Read-time Write-time\n"; - print "----------------------------------------------------\n"; - while ( $count < $OSTS ) { - if ( $ACTIVEOST_INX[$count] ) { - printf "%d %.3f %.3f %.3f %.3f\n",$count, - $rMBs[$count], $wMBs[$count], $rTime[$count], $wTime[$count]; - } else { - printf "%d Inactive ost\n",$count; - } - $count = $count + 1; - } -} - -@rTime = (); -@wTime = (); -@rMBs = (); -@wMBs = (); -@ACTIVEOST_INX; - -# Locals -my $filename = ""; -my $dirpath = ""; -my $flag = 0; - -# Command line parameter parsing -use Getopt::Std; -getopts('s:h') or usage(); -usage() if $opt_h; -$FSIZE = $opt_s if $opt_s; - -my $i = 0; -foreach (@ARGV) { - $MNT = $_; - $i++; - if ($i > 1) { - print "ERROR: extra argument $_\n"; - usage(); - } -} -#Check for Time::HiRes module -my $CheckTimeHiRes = "require Time::HiRes"; -eval ($CheckTimeHiRes) or die "You need to install the perl-Time-HiRes package to use this script\n"; -my $LoadTimeHiRes = "use Time::HiRes qw(gettimeofday)"; -eval ($LoadTimeHiRes); - -use POSIX qw(strftime); -my $time_v = time(); -my $hostname = `lctl list_nids | head -1` or die "You need to install lctl to use this script\n"; -chop($hostname); -print "$pname: ", strftime("%D", localtime($time_v)); -print " OST speed survey on $MNT from $hostname\n"; - -# get OST count -ost_count (); -# turn off local cache -cache_off (); - -$dirpath = "$MNT/ost_survey_tmp"; -eval { mkpath($dirpath) }; -if ($@) { - print "Couldn't create $dirpath: $@"; - exit 1; -} - -use File::Path; -$CNT = 0; -while ($CNT < $OSTS) { - $filename = "$dirpath/file$CNT"; - if ( $ACTIVEOST_INX[$CNT] ) { - # set stripe for OST number $CNT - system ("lfs setstripe $filename 0 $CNT 1"); - # Perform write for OST number $CNT - &run_test($FSIZE,$CNT,"write",$filename); - $flag++; - } - $CNT = $CNT + 1; -} -$CNT = 0; -while ($CNT < $OSTS) { - $filename = "$dirpath/file$CNT"; - if ( $ACTIVEOST_INX[$CNT] ) { - # Perform read for OST number $CNT - &run_test($FSIZE,$CNT,"read",$filename); - $flag++; - } - $CNT = $CNT + 1; -} - -# if read or write performed on any OST then display information. -if ( $flag ) { - if ( $flag > 1 ) { - &calculate("Read",@rMBs); - &calculate("Write",@wMBs); - } - output_all_data (); -} else { - print "There is no active OST's found\n"; -} - -cache_on (); - -eval { rmtree($dirpath) }; -if ($@) { - print "Warning: Couldn't remove $dirpath: $@"; -} diff --git a/lustre-iokit/ost-survey/plot-ost b/lustre-iokit/ost-survey/plot-ost deleted file mode 100755 index a16a2a161a2357221fee80b48ac83f6b808be3d0..0000000000000000000000000000000000000000 --- a/lustre-iokit/ost-survey/plot-ost +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/perl -w -# Report generation for ost-survey.pl -# =================================== -# The plot-ost.pl script is used to generate csv file and -# instructions files for gnuplot from the output of ost-survey.pl script. -# -# The plot-ost.pl also creates .scr file that contains instructions -# for gnuplot to plot the graph. After generating .dat and .scr files this -# script invokes gnuplot to display graph. -# -# Syntax: -# $ plot-ost.pl <log_filename> -# Note: 1. This script may need modifications whenever there will be -# modifications in output format of ost-survey.pl script. -# 2. Gnuplot version 4.0 or above is required. - -# arg 0 is filename -sub usages_msg(){ - print "Usage: $0 <log_filename> \n"; - print " $0 produces graphs from the output of ost-survey.pl\n"; - print " using gnuplot.\n"; - print "e.g.# perl ost-survey /mnt/lustre > ost-log; perl $0 ost-log\n"; - exit 1; -} - -my $count = 0; # count for number of rows in csv(.dat) file. -my @line; # To store recently read line from log file -my $flag = 0; -my @GraphTitle; -if ( !$ARGV[0] ) { - usages_msg(); -} - -$file = $ARGV[0]; -# Open log file for reading -open ( PFILE, "$file") or die "Can't open results log file"; -# Open .csv file for writting required columns from log file. -open ( DATAFILE, "> $file.dat" ) or die "Can't open csv file for writting"; -LABLE:while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); # splits line into tokens - # This comparison may be changed if there will be changes log file. - if ( $line[0] eq "Ost#" ) { - print DATAFILE "$line[0] $line[1] $line[2]\n"; - $flag = 1; - <PFILE>; # skip the "---------" line from result file. - last LABLE; - } - if ($line[2] eq "OST" && $line[3] eq "speed") { - @GraphTitle = @line; - @GraphTitle = split( /:/ ); - } -} -if ( !$flag) { - print "Invalid logfile format\n"; - exit 1; -} -while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); # splits line into tokens - if ( $line[1] ne "Inactive" ) { - print DATAFILE "$count $line[1] $line[2]\n"; - } - $count = $count + 1; -} -close PFILE; -close DATAFILE; -# Open .scr file for writting instructions for gnuplot. -open ( SCRFILE, "> $file.scr" ) or die "Can't open scr file for writting"; -# generate instructions for gnuplot. decide axes depends on ranges in @columnvalues -print SCRFILE "set title \"$GraphTitle[1]\"\n"; -print SCRFILE "set xlabel \"OST index\"\n"; -print SCRFILE "set ylabel \"MB/s\"\n"; -print SCRFILE "set boxwidth 0.2\n"; -print SCRFILE "plot \"$file.dat\" using 1:2 axes x1y1 title \"Read(MB/s)\" with boxes fs solid 0.7\n"; -print SCRFILE "replot \"$file.dat\" using (\$1 + 0.2):3 axes x1y1 title \"Write(MB/s)\" with boxes fs solid 0.7\n"; -print SCRFILE "pause -1\n"; -close SCRFILE; -# invoke gnuplot to display graph. -system ("gnuplot $file.scr") == 0 or die "ERROR: while ploting graph.\nMake sure that gnuplot is working properly"; diff --git a/lustre-iokit/sgpdd-survey/Makefile.am b/lustre-iokit/sgpdd-survey/Makefile.am deleted file mode 100644 index bc568abb000841c885186d88d65a9bd061a38e63..0000000000000000000000000000000000000000 --- a/lustre-iokit/sgpdd-survey/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -bin_SCRIPTS = parse-sgpdd sgpdd-survey -CLEANFILE = $(bin_SCRIPTS) -EXTRA_DIST = README.sgpdd-survey parse-sgpdd sgpdd-survey diff --git a/lustre-iokit/sgpdd-survey/README.sgpdd-survey b/lustre-iokit/sgpdd-survey/README.sgpdd-survey deleted file mode 100644 index 4b95eda10f169b1d6b291c04e208c02890757142..0000000000000000000000000000000000000000 --- a/lustre-iokit/sgpdd-survey/README.sgpdd-survey +++ /dev/null @@ -1,91 +0,0 @@ -WARNING: Running sgp_dd will ERASE the contents of the disk devices. - This is NOT to be run on any OST where you care about any data - or you are not expecting to reformat the filesystem afterward. - -Requirements ------------- - -. sg3_utils (for sgp_dd) - SCSI device - Or, if using non-scsi disk - raw device support - sg3_utils - - -Overview --------- - -This survey may be used to characterise the performance of a SCSI device. -It simulates an OST serving multiple stripe files. The data gathered by it -can help set expectations for the performance of a lustre OST exporting the -device. - -The script uses sgp_dd to do raw sequential disk I/O. It runs with -variable numbers of sgp_dd threads to show how performance varies with -different request queue depths. - -The script spawns variable numbers of sgp_dd instances, each reading or -writing a separate area of the disk to show how performance varies with the -number of concurrent stripe files. - -The device(s) used must meet one of two tests: -SCSI device: - Must appear in the output of 'sg_map' - (make sure the kernel module "sg" is loaded) -Raw device: - Must appear in the output of 'raw -qa' - - If you need to create raw devices in order to use this tool, note that - raw device 0 can not be used due to a bug in certain versions of the - "raw" utility (including that shipped with RHEL4U4.) - -You may not mix raw and SCSI devices in the test specification. - - -Running -------- - -The script must be customised according to the particular device under test -and where it should keep its working files. Customisation variables are -described clearly at the start of the script. - -e.g.: scsidevs=/dev/sda size=128 crghi=16 thrhi=32 ./sgpdd-survey - -When the script runs, it creates a number of working files and a pair of -result files. All files start with the prefix given by ${rslt}. - -${rslt}_<date/time>.summary same as stdout -${rslt}_<date/time>_* tmp files -${rslt}_<date/time>.detail collected tmp files for post-mortem - -The summary file and stdout contain lines like... - -total_size 8388608K rsz 1024 thr 1 crg 1 180.45 MB/s 1 x 180.50 = 180.50 MB/s - -The number immediately before the first MB/s is the bandwidth computed by -measuring total data and elapsed time. The other numbers are a check on -the bandwidths reported by the individual sgp_dd instances. - -If there are so many threads that sgp_dd is unlikely to be able to allocate -I/O buffers, "ENOMEM" is printed. - -If not all the sgp_dd instances successfully reported a bandwidth number -"failed" is printed. - - -Visualising Results -------------------- - -I've found it most useful to import the summary data (it's fixed width) -into Excel (or any graphing package) and graph bandwidth v. # threads for -varying numbers of concurrent regions. This shows how the device performs -with varying queue depth. If the series (varying numbers of concurrent -regions) all seem to land on top of each other, it shows the device is -phased by seeks at the given record size. - - -The included script "parse.pl" will process output files and create -.csv files for spreadsheet import - -The "plot-sgpdd.pl" script plots the results directly using gnuplot. - diff --git a/lustre-iokit/sgpdd-survey/parse-sgpdd b/lustre-iokit/sgpdd-survey/parse-sgpdd deleted file mode 100644 index 84b222527cc10f578a95c27365d732f4d5728865..0000000000000000000000000000000000000000 --- a/lustre-iokit/sgpdd-survey/parse-sgpdd +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -w - -# arg 0 is filename -# arg 1 is 'w' or 'r' for reading or writing - -$file = $ARGV[0]; - -$type = $ARGV[1]; -print "$file\n"; - - -open ( PFILE, "$file") or die "Can't open results"; -while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); - if ( $type eq 'w' ) { - # print "$line[5] $line[7] $line[9]\n"; - if( $line[9]) { - $out{$line[5]}{$line[7]} = $line[9]; - } - } else { - if( $line[18]) { - # print "$line[5] $line[7] $line[18]\n"; - $out{$line[5]}{$line[7]} = $line[18]; - } - } -} - - foreach $crg ( sort { $a <=> $b } ( keys %out )) { - print "$crg"; - @list = ( sort { $a <=> $b } ( keys %{ $out{$crg} } )); - foreach $thr ( @list ) { - # These are the headers - print ",$thr"; - } - print "\n"; - print "$crg"; - foreach $tthr ( @list ) { - print ",$out{$crg}{$tthr}"; - } - print "\n"; - } - - diff --git a/lustre-iokit/sgpdd-survey/plot-sgpdd b/lustre-iokit/sgpdd-survey/plot-sgpdd deleted file mode 100755 index 85aa38828a70bea46f22e14e9756e726bdfb94bb..0000000000000000000000000000000000000000 --- a/lustre-iokit/sgpdd-survey/plot-sgpdd +++ /dev/null @@ -1,253 +0,0 @@ -#!/usr/bin/perl -w -# Report generation for plot-sgpdd -# ================================ -# The plot-sgpdd script is used to generate csv file and -# instructions files for gnuplot from the output of sgpdd-survey.pl script. -# -# The plot-sgpdd also creates .scr file that contains instructions -# for gnuplot to plot the graph. After generating .dat and .scr files this -# script invokes gnuplot to display graph. -# -# Syntax: -# $ sgpdd-survey > log_filename -# $ plot-sgpdd <log_filename> -# [Note: 1. This script may need modifications whenever there will be -# modifications in output format of sgpdd-survey.pl script. -# 2. Gnuplot version 4.0 or above is required.] - -sub usage() -{ - print STDERR "Usage: $0 [options] <log_filename>\n"; - print STDERR " $0 parses and plots graphs from the output of sgpdd-survey.pl\n"; - print STDERR " It generates text data files (.dat) and graphs (.png) using gnuplot.\n"; - print STDERR "options:\n"; - print STDERR " --rt: Subtitle for read graphs\n"; - print STDERR " --wt: Subtitle for write graphs\n"; - print STDERR " --y: Y-axis scale\n"; - print STDERR "e.g. # $0 --rt=\"no prefetch\" --wt=\"WB disabled\" --y=500 sgpdd.summary\n"; - exit 1; -} - -# check whether gnuplot exists? -system ("which gnuplot > /dev/null") == 0 or die "gnuplot does not exist, please install it and try again.\n"; - -my @GraphTitle; - -#Subroutine to write .scr file that further used by gnuplot to plot the graph. -sub write_scr_file() { - my $op = $_[0]; - print "generating plot $file-$rsz-$op.png\n"; - open ( SCRFILE, "> $file-$rsz-$op.scr" ) or die "Can't open scr file for writing"; - if ($op eq "rd") { - $rwlabel = "Read"; - } - if ($op eq "wr") { - $rwlabel = "Write"; - } - - if ($opt_rdtitle || $opt_wrtitle) { - if ($op eq "rd") { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz, $opt_rdtitle\"\n"; - } - if ($op eq "wr") { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz, $opt_wrtitle\"\n"; - } - } else { - print SCRFILE "set title \"@GraphTitle\\n$rwlabel, Rsize = $rsz\"\n"; - } - print SCRFILE "set xlabel \"Threads\"\n"; - print SCRFILE "set ylabel \"Speeds(MB/s)\"\n"; - print SCRFILE "set logscale x\n"; - print SCRFILE "set grid\n"; - if ($opt_y != 0) { - print SCRFILE "set yrange [ 0:$opt_y ]\n"; - } else { - print SCRFILE "set yrange [ 0: ]\n"; - } - - my $plot = "plot"; - $i = 2; - $xrange = 1; - # generate instructions for gnuplot, with adjusting X-axes ranges - for ($j = 1; $j <= $thread ; $j = $j + $j) { - if ($op eq "wr") { - printf SCRFILE "$plot \"$file-$rsz-$op.dat\" using 1:$i axes x%dy1 title \"write$j\" with line\n", $xrange; - } - if ($op eq "rd") { - printf SCRFILE "$plot \"$file-$rsz-$op.dat\" using 1:$i axes x%dy1 title \"read$j\" with line\n", $xrange; - } - $i++; - $plot = "replot"; - } - print SCRFILE "set terminal png\n"; - print SCRFILE "set output \"$file-$rsz-$op.png\"\n"; - print SCRFILE "replot\n"; - close SCRFILE; - # invoke gnuplot to display graph. - system ("gnuplot $file-$rsz-$op.scr") == 0 or die "ERROR: while ploting graph"; - system ("rm $file-$rsz-$op.scr"); -} - -#Subroutine to write .dat file that further used by gnuplot to plot the graph. -sub write_dat_file() { - my $op = $_[0]; - print "writing data $file-$rsz-$op.dat\n"; - # Open .csv/.dat file for writing required columns from log file. - open ( DATAFILE, "> $file-$rsz-$op.dat" ) or die "Can't open csv file for writing"; - printf DATAFILE "%-6s", "0"; - for ($j = 1; $j <= $thread ; $j = $j + $j) { - printf DATAFILE "%-8s", "$op$j"; - } - for ( $i = 1; $i <= $region; $i = $i + $i ) { - printf DATAFILE "\n%-6s", $i; - for ($j = 1; $j <= $thread ; $j = $j + $j) { - if (($op eq "rd" && $rdwr) || ($op eq "wr" && $wrrd) || ($readop) || ($writeop)) { - if ( $out{$i}{$j} ) { - printf DATAFILE "%-8s", $out{$i}{$j}; - } else { - printf DATAFILE "%-8s", "-"; - } - } else { - if (($j <= 1 && $out{$i}{$j - 1})) { - printf DATAFILE "%-8s", $out{$i}{$j - 1}; - }elsif ($out{$i}{$j + 1} && $j > 1) { - printf DATAFILE "%-8s", $out{$i}{$j + 1}; - } else { - printf DATAFILE "%-8s", "-"; - } - } - } - } - close DATAFILE; -} - -if ( !$ARGV[0] ) { - usage(); -} -$region = 0; -$thread = 0; -$count = 0; -$wrrd = 0; -$rdwr = 0; -$writeop = 0; -$readop = 0; -$rsz = 0; -$opt_rdtitle = ""; -$opt_wrtitle = ""; -$opt_y = 0; -# Command line parameter parsing -use Getopt::Long; -GetOptions ('help' => \$opt_help, 'rt=s' => \$opt_rdtitle, 'wt=s' => \$opt_wrtitle, 'y=i' => \$opt_y) or usage(); -if ($opt_help) { - usage(); -} -$file = $ARGV[0]; - -open ( PFILE, "$file") or die "Can't open $file"; -LABEL: while ( <PFILE> ) { - chomp; - @line = split( /\s+/ ); - if ($line[27]) { - print "invalid file format\n"; - exit 1; - } - if ($count == 0) { - @GraphTitle = @line; - $count++; - next LABEL; - } - if ($line[8]) { - if ($line[8] eq "ENOMEM") { - next LABEL; - } - } - if (!$rsz && $line[3]) { - $rsz = $line[3]; - } - if ($rsz != $line[3]) { - if($readop) { - &write_dat_file("rd"); - &write_scr_file("rd"); - } - if($writeop) { - &write_dat_file("wr"); - &write_scr_file("wr"); - } - if ($wrrd || $rdwr) { - &write_dat_file("rd"); - &write_scr_file("rd"); - &write_dat_file("wr"); - &write_scr_file("wr"); - } - $rsz = $line[3]; - $region = 0; - $thread = 0; - } - #print "rg$line[5] th$line[7] w$line[9] r$line[$rindex]\n"; - $rindex = 18; - if ($line[18]) { - if ($line[10] eq "failed") { - $rindex = 12; - } - if ($line[8] eq "write" && $line[17] eq "read") { - $wrrd = 1; - } - if ($line[8] eq "read" && $line[17] eq "write") { - $rdwr = 1; - } - } else { - if ($line[8] eq "write" && $line[9]) { - $writeop = 1; - } - if ($line[8] eq "read" && $line[9]) { - $readop = 1; - } - - } - if ($wrrd || $rdwr) { - $out{$line[7]}{$line[5]} = $line[9]; - if ($line[$rindex+1]) { - if (!($line[$rindex+1] eq "failed")) { - goto LABEL2; - } - } else { -LABEL2: if ($line[5] <= 1 ) { - $out{$line[7]}{$line[5] - 1} = $line[$rindex]; - } else { - $out{$line[7]}{$line[5] + 1} = $line[$rindex]; - } - } - } - if ($writeop) { - $out{$line[7]}{$line[5]} = $line[9]; - } - if ($readop) { - $out{$line[7]}{$line[5]} = $line[9]; - } - if ( $region < $line[7] ) { - $region = $line[7]; - } - if ( $thread < $line[5] ) { - $thread = $line[5]; - } - $count++; -} -close PFILE; -if ($count > 1 && $rsz) { - if($readop) { - &write_dat_file("rd"); - &write_scr_file("rd"); - } - if($writeop) { - &write_dat_file("wr"); - &write_scr_file("wr"); - } - if ($wrrd || $rdwr) { - &write_dat_file("rd"); - &write_scr_file("rd"); - &write_dat_file("wr"); - &write_scr_file("wr"); - } -} else { - print "Invalid log file format\n"; -} diff --git a/lustre-iokit/sgpdd-survey/sgpdd-survey b/lustre-iokit/sgpdd-survey/sgpdd-survey deleted file mode 100755 index e6878af22ad6299c1170b9e0dc3d715bfd9d33f8..0000000000000000000000000000000000000000 --- a/lustre-iokit/sgpdd-survey/sgpdd-survey +++ /dev/null @@ -1,178 +0,0 @@ -#!/bin/bash - -###################################################################### -# customize per survey - -# CHOOSE EITHER scsidevs or rawdevs -# the SCSI devices to measure - WARNING: will be erased. -# The raw devices to use -# rawdevs=${rawdevs:-"/dev/raw/raw1"} -# scsidevs=`ls /dev/sd[a-z] /dev/sd[a-z][a-z]` # all devices, if you use udev - -# result file prefix. date/time+hostname makes unique -# NB ensure the path exists if it includes subdirs -rslt=${rslt:-"/tmp/sgpdd_survey_`date +%F@%R`_`uname -n`"} - -# what to do (read or write) -actions=${actions:-"write read"} - -# total size per device (MBytes) -# NB bigger than device cache is good -size=${size:-8192} - -# record size (KBytes) -rszlo=${rszlo:-1024} -rszhi=${rszhi:-1024} - -# Concurrent regions per device -crglo=${crglo:-1} -crghi=${crghi:-256} - -# threads to share between concurrent regions per device -# multiple threads per region simulates a deeper request queue -# NB survey skips over #thr < #regions and #thr/#regions > SG_MAX_QUEUE -thrlo=${thrlo:-1} -thrhi=${thrhi:-4096} - -##################################################################### -# leave the rest of this alone unless you know what you're doing... - -# sgp_dd's idea of disk sector size (Bytes) -bs=512 -# and max # threads one instance will spawn -SG_MAX_QUEUE=16 - -# map given device names into SG device names -i=0 -devs=() -if [ "$scsidevs" ]; then - # we will test for a LUN, the test for a partition - # if the partition number is > 9 this will fail - for d in $scsidevs; do - devs[$i]=`sg_map | awk "{if ($ 2 == \"$d\") print $ 1}"` - if [ -z "${devs[i]}" ]; then - echo "Can't find SG device for $d, testing for partition" - pt=`echo $d | sed 's/[0-9]$//'` - # Try again - devs[$i]=`sg_map | awk "{if ($ 2 == \"$pt\") print $ 1}"` - if [ -z "${devs[i]}" ]; then - echo "Can't find SG device $pt" - exit 1 - fi - fi - i=$((i+1)) - done -elif [ "$rawdevs" ]; then - for r in $rawdevs; do - RES=`raw -q $r` - if [ $? -eq 0 ];then - devs[$i]=$r - i=$((i+1)) - else - echo "Raw device $r not set up" - exit 1 - fi - done -else - echo "Must specify scsidevs or rawdevs" - exit 1 -fi - -ndevs=${#devs[@]} - -rsltf=${rslt}.summary -workf=${rslt}.detail -echo -n > $rsltf -echo -n > $workf - -print_summary () { - if [ "$1" = "-n" ]; then - minusn=$1; shift - else - minusn="" - fi - echo $minusn "$*" >> $rsltf - echo $minusn "$*" -} - -print_summary "$(date) sgpdd-survey on $rawdevs$scsidevs from $(hostname)" - -for ((rsz=$rszlo;rsz<=$rszhi;rsz*=2)); do - for ((crg=$crglo;crg<=$crghi;crg*=2)); do - for ((thr=$thrlo;thr<=$thrhi;thr*=2)); do - if ((thr < crg || thr/crg > SG_MAX_QUEUE)); then - continue - fi - # compute parameters - bpt=$((rsz*1024/bs)) - blocks=$((size*((1024*1024)/bs)/crg)) - count=$blocks - # show computed parameters - actual_rsz=$((bpt*bs/1024)) - actual_size=$((bs*count*crg/1024)) - str=`printf 'total_size %8dK rsz %4d crg %5d thr %5d ' \ - $((actual_size*ndevs)) $actual_rsz $((crg*ndevs)) $((thr*ndevs))` - echo "==============> $str" >> $workf - print_summary -n "$str" - freemem=`awk < /proc/meminfo '/^MemTotal:/ {printf "%d\n", $2}'` - if (((actual_rsz*thr/crg + 64)*crg*ndevs > freemem)); then - print_summary "ENOMEM" - continue - fi - # run tests - for action in $actions; do - print_summary -n "$action " - echo "=====> $action" >> $workf - tmpf=${workf}_tmp - # start test - t0=`date +%s.%N` - for ((i=0;i<ndevs;i++)); do - dev=${devs[i]} - if [ $action = read ]; then - inf="if=$dev" - outf="of=/dev/null" - skip=skip - else - inf="if=/dev/zero" - outf="of=$dev" - skip=seek - fi - for ((j=0;j<crg;j++)); do - sgp_dd 2> ${tmpf}_${i}_${j} \ - $inf $outf ${skip}=$((1024+j*blocks)) \ - thr=$((thr/crg)) count=$count bs=$bs bpt=$bpt time=1& - done - done - wait - t1=`date +%s.%N` - # collect/check individual stats - echo > $tmpf - ok=0 - for ((i=0;i<ndevs;i++)); do - for ((j=0;j<crg;j++)); do - rtmp=${tmpf}_${i}_${j} - if grep 'time to transfer data' $rtmp > /dev/null 2>&1; then - ok=$((ok + 1)) - fi - cat ${rtmp} >> $tmpf - cat ${rtmp} >> $workf - rm ${rtmp} - done - done - if ((ok != ndevs*crg)); then - print_summary -n "$((ndevs*crg - ok)) failed " - else - # compute MB/sec from elapsed - bw=`awk "BEGIN {printf \"%7.2f MB/s\", $actual_size * $ndevs / (( $t1 - $t0 ) * 1024); exit}"` - # compute MB/sec from nregions*slowest - check=`awk < $tmpf \ - '/time to transfer data/ {mb=$8/1.048576; if (n == 0 || mb < min) min = mb; n++}\ - END {printf "%5d x %6.2f = %7.2f MB/s", n, min, min * n}'` - print_summary -n "$bw $check " - fi - rm $tmpf - done - print_summary "" - done - done -done diff --git a/lustrecvs b/lustrecvs deleted file mode 100755 index 8023a34bc737850ab56529bae2082694b626a868..0000000000000000000000000000000000000000 --- a/lustrecvs +++ /dev/null @@ -1,137 +0,0 @@ -#!/bin/bash - -LC_COLLATE="C" -progname="${0##*/}" - -warn () -{ - [ "$1" ] && echo >&2 - [ "$1" ] && echo "$progname: $1" >&2 - [ "$1" ] && echo >&2 -} - -fatal () -{ - warn "$2" - exit "$1" -} - -usage () -{ - cat <<EOF -Usage: $progname <lustretag> <pindate> - where <lustretag> is a tag of the lustre-core module - and <pindate> is an optional quoted timestamp suitable for cvs -D -EOF -} - -if [ -z "$LUSTRECVS_UPDATED" ] ; then - echo "$progname: updating lustrecvs" - cvs up -l || fatal 1 "Error updating lustrecvs" - export LUSTRECVS_UPDATED=yes - exec "$0" "$@" -fi - -[ "$1" = "-r" ] && shift - -buildtag="HEAD" -lustretag="$1" -shift -pindate=$1 -shift - -if [ "$*" ] ; then - usage >&2 - exit 1 -fi - -case "$lustretag" in - '') - warn "a lustretag is required." - usage >&2 - exit 1 - ;; - --help | -h) - usage - exit 0 - ;; - - # this is the branch table - # keep this list sorted alphabetically! - - # These use special build directories - - b1_4*) buildtag="b1_4" ;; - - b_release_1_4_6-patchless) buildtag="b1_4" ;; - b_release_1_4_7-test) buildtag="b_release_1_4_7" ;; - - b_release*) buildtag=$lustretag ;; - - # These releases did not get build tagged for them because they - # this build system didn't exist when they were tagged - v1_2_8|v1_4_0) - buildtag="b1_4" - ;; - - v*) buildtag=$lustretag ;; - - # this is the branch table - # keep this list sorted alphabetically! - - *) - buildtag="HEAD" - ;; -esac - -error_modules= -cvs_cmd () -{ - dir="$1" - module="$2" - tag="$3" - cotag="" - update="" - - if [ "$tag" = "HEAD" ] ; then - cotag="" - uptag="-A" - elif [ "$tag" ] ; then - cotag="-r $tag" - uptag="-r $tag" - else - # silently skip if no tag was specified - return - fi - - # create a cvs date format that will survive shell expansion - if [ -n "$pindate" ]; then - datecmd=$(date -u +%s -d "$pindate") - datecmd="-D @$datecmd" - else - datecmd="" - fi - - if [ -d "$dir" ] ; then - echo "$progname: Updating $dir to $tag" - ( cd "$dir" && cvs up $datecmd -dAP $uptag ) - else - echo "$progname: Checking out $dir from $tag" - cvs co $datecmd -P $cotag -d "$dir" "$module" - fi - if [ $? != 0 ] ; then - error_modules="$dir $error_modules" - fi -} - -cvs_cmd build lustre-build "$buildtag" - -if [ -f build/buildcvs ] ; then - . build/buildcvs -else - fatal 1 "build/buildcvs does not exist; not updating other modules." -fi - -if [ "$error_modules" ] ; then - fatal 1 "There were errors checking out the following directories: $error_modules" -fi diff --git a/snmp/.cvsignore b/snmp/.cvsignore deleted file mode 100644 index 051d1bd50ba9079da9fad7e99b1098a1b6e2d75c..0000000000000000000000000000000000000000 --- a/snmp/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -Makefile.in -.deps diff --git a/snmp/Lustre-MIB.txt b/snmp/Lustre-MIB.txt deleted file mode 100644 index db662d19afb58afede5ec64ec79af39f8b312062..0000000000000000000000000000000000000000 --- a/snmp/Lustre-MIB.txt +++ /dev/null @@ -1,966 +0,0 @@ --- --- Lustre Filesystem MIB Module --- - -LUSTRE-MIB DEFINITIONS ::= BEGIN - IMPORTS - MODULE-IDENTITY, OBJECT-TYPE, enterprises, Integer32, - Unsigned32, Counter64 - FROM SNMPv2-SMI - DisplayString, TruthValue, RowStatus - FROM SNMPv2-TC; - -lustreMIB MODULE-IDENTITY - LAST-UPDATED "200505150100Z" - ORGANIZATION "Cluster Filesystems, Inc." - CONTACT-INFO - " info@clusterfs.com - - Postal: Cluster File Systems, Inc. - 110 Capen St - Medford, MA 02155 - USA" - - DESCRIPTION - "Management information for the Lustre parallel filesystem." - - ::= { clusterFileSystemsSNMP 1 } - ---============================================================================ --- --- Administrative assignments --- ---============================================================================ - -clusterFileSystems OBJECT IDENTIFIER ::= { enterprises 13140 } -clusterFileSystemsSNMP OBJECT IDENTIFIER ::= { clusterFileSystems 2 } - -lustreMgmtTraps OBJECT IDENTIFIER ::= { lustreMIB 0 } -systemInformation OBJECT IDENTIFIER ::= { lustreMIB 1 } -objectStorageTargets OBJECT IDENTIFIER ::= { lustreMIB 2 } -objectStorageClients OBJECT IDENTIFIER ::= { lustreMIB 3 } -metaDataServers OBJECT IDENTIFIER ::= { lustreMIB 4 } -metaDataClients OBJECT IDENTIFIER ::= { lustreMIB 5 } -lustreClients OBJECT IDENTIFIER ::= { lustreMIB 6 } -logicalObjectVolume OBJECT IDENTIFIER ::= { lustreMIB 7 } -lustreLDLM OBJECT IDENTIFIER ::= { lustreMIB 8 } - - ---============================================================================ --- --- System Information --- ---============================================================================ - -sysVersion OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre version string." - ::= { systemInformation 1 } - -sysKernelVersion OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre kernel version string." - ::= { systemInformation 2 } - -sysHealthCheck OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre kernel health check string. Contains 'healthy' or - 'NOT healthy' plus descriptive information on the failure" - ::= { systemInformation 3 } - -sysStatus OBJECT-TYPE - SYNTAX INTEGER { - -- The following two values are states: - -- they may be read, or written - online(1), - offline(2), - -- The following two values are states: - -- they may be read, but not written - onlinePending(3), - offlinePending(4), - -- The following value is an action: - -- this value may be written, but never read. - restart(5) - } - MAX-ACCESS read-write - STATUS current - DESCRIPTION - "The sysStatus variable is used to manage the overall state of the - Lustre components on a system. It has five defined values: - - - 'online', which indicates that all of the configured - Lustre components have been successfully activated; - - - 'offline', which indicates that all of the Lustre - components have been successfully deactivated; - - - 'onlinePending', which indicates one or more failures - occurred in transitioning to the 'online' state; - - - 'offlinePending', which indicates one or more failures - occurred in transitioning to the 'offline' state; - - - 'restart', which is supplied by a management station - wishing to transition first to the 'offline' state, and - then to the 'online' state. - - Only three of these five values may be specified in a - management protocol set operation: 'online', 'offline', - 'restart'. Only four of the five values will be returned in - response to a management protocol retrieval operation: - 'online', 'offline', 'onlinePending', 'offlinePending'." - - ::= { systemInformation 4 } - ---============================================================================ --- --- Object Storage Targets --- ---============================================================================ - -osdNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of Object Storage Devices on a OST system." - ::= { objectStorageTargets 1 } - -osdTable OBJECT-TYPE - SYNTAX SEQUENCE OF OsdEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the Object Storage Devices available on a OST system. - The number of entries in this table is available in osdNumber." - ::= { objectStorageTargets 2 } - -osdEntry OBJECT-TYPE - SYNTAX OsdEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information an Object Storage Device on a OST - system." - INDEX { osdIndex } - ::= { osdTable 1 } - -OsdEntry ::= - SEQUENCE { - osdIndex Unsigned32, - osdUUID DisplayString, - osdCommonName DisplayString, - osdCapacity Counter64, - osdFreeCapacity Counter64, - osdObjects Counter64, - osdFreeObjects Counter64 - } - -osdIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of Object Storage Devices on a OST system." - ::= { osdEntry 1 } - -osdUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universally Unique Identifier (UUID) for the Object - Storage Device." - ::= { osdEntry 2 } - -osdCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Its the instance name given by Lustre proc subsystem for - each of the object storage device." - ::= { osdEntry 3 } - -osdCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The capacity of the Object Storage Device in bytes." - ::= { osdEntry 4 } - -osdFreeCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remaining free capacity of the Object Storage Device in bytes." - ::= { osdEntry 5 } - -osdObjects OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum number of objects that may be stored in an Object - Storage Device." - ::= { osdEntry 6 } - -osdFreeObjects OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of unused objects on an Object Storage Device." - ::= { osdEntry 7 } - ---============================================================================ --- --- Object Storage Client --- ---============================================================================ - -oscNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of Object Storage Clients." - ::= { objectStorageClients 1 } - -oscTable OBJECT-TYPE - SYNTAX SEQUENCE OF OscEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the Object Storage Clients available. - The number of entries in this table is available in oscNumber." - ::= { objectStorageClients 2 } - -oscEntry OBJECT-TYPE - SYNTAX OscEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information an Object Storage Clients." - INDEX { oscIndex } - ::= { oscTable 1 } - -OscEntry ::= - SEQUENCE { - oscIndex Unsigned32, - oscUUID DisplayString, - oscCommonName DisplayString, - oscOSTServerUUID DisplayString, - oscCapacity Counter64, - oscFreeCapacity Counter64, - oscObjects Counter64, - oscFreeObjects Counter64 - } - -oscIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of Object Storage Clients." - ::= { oscEntry 1 } - -oscUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universally Unique Identifier (UUID) for the Object - Storage Device." - ::= { oscEntry 2 } - -oscCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Its the instance name given by Lustre proc subsystem for - each of the object storage device." - ::= { oscEntry 3 } - -oscOSTServerUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Object Storage Target UUID accessed by Object Storage Client. - This string also contains a state name, and possibly a DEACTIVATED flag." - ::= { oscEntry 4 } - -oscCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The capacity of the Object Storage Client in bytes." - ::= { oscEntry 5 } - -oscFreeCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remaining free capacity of the Object Storage Client in bytes." - ::= { oscEntry 6 } - -oscObjects OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum number of objects that may be stored in an Object - Storage Client." - ::= { oscEntry 7 } - -oscFreeObjects OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of unused objects on an Object Storage Client." - ::= { oscEntry 8 } - ---============================================================================ --- --- Metadata Servers --- ---============================================================================ - -mddNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of Metadata Devices on a MDS system." - ::= { metaDataServers 1 } - -mddTable OBJECT-TYPE - SYNTAX SEQUENCE OF MddEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the Meta Data Devices available on a MDS system. - The number of entries in this table is available in mddNumber." - ::= { metaDataServers 2 } - -mddEntry OBJECT-TYPE - SYNTAX MddEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information an Meta Data Device on a MDS system." - INDEX { mddIndex } - ::= { mddTable 1 } - -MddEntry ::= - SEQUENCE { - mddIndex Unsigned32, - mddUUID DisplayString, - mddCommonName DisplayString, - mddCapacity Counter64, - mddFreeCapacity Counter64, - mddFiles Counter64, - mddFreeFiles Counter64 - } - -mddIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of Meta Data Devices on a MDS system." - ::= { mddEntry 1 } - -mddUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universal Unique Identifier (UUID) for the Meta Data - Device." - ::= { mddEntry 2 } - -mddCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The common name of the Meta Data Device. - Its the instance name given by Lustre proc subsystem for - each of the Meta Data Device." - ::= { mddEntry 3 } - -mddCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The capacity of the Meta Data Device in bytes." - ::= { mddEntry 4 } - -mddFreeCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remaining free capacity of the Meta Data Device in bytes." - ::= { mddEntry 5 } - -mddFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum number of files that may be stored on a Meta Data - Device." - ::= { mddEntry 6 } - -mddFreeFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of unused files on a Meta Data Device." - ::= { mddEntry 7 } - - ---============================================================================ --- --- Metadata Clients --- ---============================================================================ - -mdcNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of Metadata Clients." - ::= { metaDataClients 1 } - -mdcTable OBJECT-TYPE - SYNTAX SEQUENCE OF MdcEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the Meta Data Clients. - The number of entries in this table is available in mdcNumber." - ::= { metaDataClients 2 } - -mdcEntry OBJECT-TYPE - SYNTAX MdcEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information an Meta Data Client." - INDEX { mdcIndex } - ::= { mdcTable 1 } - -MdcEntry ::= - SEQUENCE { - mdcIndex Unsigned32, - mdcUUID DisplayString, - mdcCommonName DisplayString, - mdcMDSServerUUID DisplayString, - mdcCapacity Counter64, - mdcFreeCapacity Counter64, - mdcFiles Counter64, - mdcFreeFiles Counter64 - } - -mdcIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of Meta Data Client." - ::= { mdcEntry 1 } - -mdcUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universal Unique Identifier (UUID) for the Meta Data Client." - ::= { mdcEntry 2 } - -mdcCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The common name of the Meta Data Client. - Its the instance name given by Lustre proc subsystem for - each of the Meta Data Client." - ::= { mdcEntry 3 } - -mdcMDSServerUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Meta data server UUID accessed by Metadata Client. - This string also contains a state name, and possibly a DEACTIVATED flag." - ::= { mdcEntry 4 } - -mdcCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The capacity of the Meta Data Client in bytes." - ::= { mdcEntry 5 } - -mdcFreeCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remaining free capacity of the Meta Data Client in bytes." - ::= { mdcEntry 6 } - -mdcFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum number of files that may be stored on a Meta Data Client." - ::= { mdcEntry 7 } - -mdcFreeFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of unused files on a Meta Data Client." - ::= { mdcEntry 8 } - ---============================================================================ --- --- Lustre Clients --- ---============================================================================ - -cliMountNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of Lustre mounts that are currently on a client system." - ::= { lustreClients 1 } - -cliMountTable OBJECT-TYPE - SYNTAX SEQUENCE OF CliMountEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the Lustre file system mount points and their - configurations. The current number of entries is specified by - cliMountNumber." - ::= { lustreClients 2 } - -cliMountEntry OBJECT-TYPE - SYNTAX CliMountEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Information about a single Lustre file system mount point." - INDEX { cliIndex } - ::= { cliMountTable 1 } - -CliMountEntry ::= - SEQUENCE { - cliIndex Unsigned32, - cliUUID DisplayString, - cliCommonName DisplayString, - cliMDSUUID DisplayString, - cliMDSCommonName DisplayString, - cliUsesLOV TruthValue, - cliLOVUUID DisplayString, - cliLOVCommonName DisplayString - } - -cliIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of Lustre file system mount points on a - client system." - ::= { cliMountEntry 1 } - -cliUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universal Unique Identifier (UUID) for a Lustre - file system mount point." - ::= { cliMountEntry 2 } - -cliCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Its the instance name given by Lustre proc subsystem for - each of the Lustre client mount point." - ::= { cliMountEntry 3 } - -cliMDCUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The UUID of the Metadata Client to which a Lustre file system - mount point is connected." - ::= { cliMountEntry 4 } - -cliMDCCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "This is the name of the Metadata Client to which a Lustre client - file system mount point is connected." - ::= { cliMountEntry 5 } - -cliUsesLOV OBJECT-TYPE - SYNTAX TruthValue - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "This variable is true(1) if a Lustre file system mount point - is using a Logical Object volume (LOV), and false(2) otherwise." - ::= { cliMountEntry 6 } - -cliLOVUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The UUID of the LOV to which a Lustre file system - mount point is connected." - ::= { cliMountEntry 7 } - -cliLOVCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "This is the name of the LOV to which a Lustre client - file system mount point is connected." - ::= { cliMountEntry 8 } - - ---============================================================================ --- --- Logical Object Volume --- ---============================================================================ - -lovNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of LOV instances." - ::= { logicalObjectVolume 1 } - -lovTable OBJECT-TYPE - SYNTAX SEQUENCE OF LovEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the statistics for LOV instances available on - a Lustre configured node." - ::= { logicalObjectVolume 2 } - -lovEntry OBJECT-TYPE - SYNTAX LovEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information on LOV stats for all LOV instances - available on a Lustre configured node." - INDEX { lovIndex } - ::= { lovTable 1 } - -LovEntry ::= - SEQUENCE { - lovIndex Unsigned32, - lovUUID DisplayString, - lovCommonName DisplayString, - lovNumOBD Unsigned32, - lovNumActiveOBD Unsigned32, - lovCapacity Counter64, - lovFreeCapacity Counter64, - lovFiles Counter64, - lovFreeFiles Counter64, - lovStripeCount Unsigned32, - lovStripeOffset Unsigned32, - lovStripeSize Unsigned32, - lovStripeType Unsigned32 - } - -lovIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of LOV stats for LOV instances available - on a Lustre configured system." - ::= { lovEntry 1 } - -lovUUID OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The Lustre Universally Unique Identifier (UUID) for the LOV." - ::= { lovEntry 2 } - -lovCommonName OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Its the instance name given by Lustre proc subsystem for - each of the LOVs." - ::= { lovEntry 3 } - -lovNumOBD OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of OBDs." - ::= { lovEntry 4 } - - -lovNumActiveOBD OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The name of the target OBDs." - ::= { lovEntry 5 } - - -lovCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The capacity of the LOV in bytes." - ::= { lovEntry 6 } - -lovFreeCapacity OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The remaining free capacity of the LOV in bytes." - ::= { lovEntry 7 } - -lovFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The maximum number of files that may be stored on a LOV." - ::= { lovEntry 8 } - -lovFreeFiles OBJECT-TYPE - SYNTAX Counter64 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of unused files on a LOV." - ::= { lovEntry 9 } - -lovStripeCount OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of stripes on a LOV." - ::= { lovEntry 10 } - -lovStripeOffset OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The stripe offset on a LOV." - ::= { lovEntry 11 } - -lovStripeSize OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The stripe size on a LOV." - ::= { lovEntry 12 } - -lovStripeType OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The stripe type on a LOV." - ::= { lovEntry 13 } - ---============================================================================ --- --- Lustre Distributed Lock Manager --- ---============================================================================ - -ldlmNumber OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The number of LDLM namespaces (Also its the number of instances - in a LDLM table)" - ::= { lustreLDLM 1 } - -ldlmTable OBJECT-TYPE - SYNTAX SEQUENCE OF LdlmEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "A table listing the statistics for LDLM namespaces available on - a Lustre configured node." - ::= { lustreLDLM 2 } - -ldlmEntry OBJECT-TYPE - SYNTAX LdlmEntry - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Table entry with information LDLM stats for all LDLM namespaces - available on a Lustre configured node." - INDEX { ldlmIndex } - ::= { ldlmTable 1 } - -LdlmEntry ::= - SEQUENCE { - ldlmIndex Unsigned32, - ldlmNameSpace DisplayString, - ldlmLockCount Unsigned32, - ldlmUnusedLockCount Unsigned32, - ldlmResourceCount Unsigned32 - } - -ldlmIndex OBJECT-TYPE - SYNTAX Unsigned32 (1..2147483647) - MAX-ACCESS not-accessible - STATUS current - DESCRIPTION - "Index into the table of LDLM stats for LDLM namespaces available - on a Lustre configured system." - ::= { ldlmEntry 1 } - -ldlmNameSpace OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The name of the particular LDLM namespace." - ::= { ldlmEntry 2 } - -ldlmLockCount OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Active Lock count of particular LDLM namespace." - ::= { ldlmEntry 3 } - -ldlmUnusedLockCount OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Unused Lock count of particular LDLM namespace." - ::= { ldlmEntry 4 } - -ldlmResourceCount OBJECT-TYPE - SYNTAX Unsigned32 - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "Resource count of particular LDLM namespace." - ::= { ldlmEntry 5 } - ---============================================================================ --- --- Lustre Management Traps --- ---============================================================================ - - -lustrePortalsCatastropeTrap NOTIFICATION-TYPE - OBJECTS { - lustrePortalsCatastropeReasonString - } - STATUS current - DESCRIPTION - "The Lustre service Failure traps sent to management station in - case of Portals Catastrophe." - ::= { lustreMgmtTraps 1 } - -lustrePortalsCatastropeReasonString OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The portals catastrophe description string." - ::= { lustreMgmtTraps 2 } - -lustreOBDUnhealthyTrap NOTIFICATION-TYPE - OBJECTS { - lustreOBDNameString, - lustreOBDUnhealthyReasonString - } - STATUS current - DESCRIPTION - "The Lustre service Failure traps sent to management station in - on OBD health check failure." - ::= { lustreMgmtTraps 3 } - -lustreOBDNameString OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The health status string." - ::= { lustreMgmtTraps 4 } - -lustreOBDUnhealthyReasonString OBJECT-TYPE - SYNTAX DisplayString - MAX-ACCESS read-only - STATUS current - DESCRIPTION - "The health status string." - ::= { lustreMgmtTraps 5 } - -END diff --git a/snmp/Makefile.am b/snmp/Makefile.am deleted file mode 100644 index 40658ab995e9104cafc4612f838dfcaf90093b64..0000000000000000000000000000000000000000 --- a/snmp/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -agent_PROGRAMS := lustresnmp.so -mib_DATA := Lustre-MIB.txt - -lustresnmp_so_SOURCES := \ - lustre-snmp.c \ - lustre-snmp.h \ - lustre-snmp-trap.c \ - lustre-snmp-trap.h \ - lustre-snmp-util.c \ - lustre-snmp-util.h - -lustresnmp_so_LDADD := $(NET_SNMP_LIBS) -lustresnmp_so_CFLAGS := -fPIC $(NET_SNMP_CFLAGS) -lustresnmp_so_LDFLAGS := -fPIC -shared - -SUBDIRS := -DIST_SUBDIRS := autoconf - -EXTRA_DIST := $(mib_DATA) diff --git a/snmp/README.install b/snmp/README.install deleted file mode 100644 index 652c4d50eeb70120bef1e2a7408f5258ab17ac08..0000000000000000000000000000000000000000 --- a/snmp/README.install +++ /dev/null @@ -1,9 +0,0 @@ -Steps for installation: - -Update /etc/snmp/snmpd.conf appending the following line: - -dlmod lustresnmp /usr/lib/lustre/snmp/lustresnmp.so - -Or the appropriate path to the snmp module. - -Then, restart the snmpd daemon. diff --git a/snmp/autoconf/.cvsignore b/snmp/autoconf/.cvsignore deleted file mode 100644 index 282522db0342d8750454b3dc162493b5fc709cc8..0000000000000000000000000000000000000000 --- a/snmp/autoconf/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/snmp/autoconf/Makefile.am b/snmp/autoconf/Makefile.am deleted file mode 100644 index 2bb7693133a84d0a874256d6b6c7774168674428..0000000000000000000000000000000000000000 --- a/snmp/autoconf/Makefile.am +++ /dev/null @@ -1 +0,0 @@ -EXTRA_DIST := lustre-snmp.m4 diff --git a/snmp/autoconf/lustre-snmp.m4 b/snmp/autoconf/lustre-snmp.m4 deleted file mode 100644 index f34a2d559a22cfd0cbceff417c6e9f592bca549d..0000000000000000000000000000000000000000 --- a/snmp/autoconf/lustre-snmp.m4 +++ /dev/null @@ -1,69 +0,0 @@ -# -# LS_CONFIGURE -# -# configure bits for lustre-snmp -# -AC_DEFUN([LS_CONFIGURE], -[AC_MSG_CHECKING([whether to try to build SNMP support]) -AC_ARG_ENABLE([snmp], - AC_HELP_STRING([--enable-snmp], - [require SNMP support (default=auto)]), - [],[enable_snmp='auto']) -AC_MSG_RESULT([$enable_snmp]) - -if test x$enable_snmp != xno ; then - AC_CHECK_PROG([NET_SNMP_CONFIG], [net-snmp-config], [net-snmp-config]) - if test "$NET_SNMP_CONFIG" ; then - NET_SNMP_CFLAGS=$($NET_SNMP_CONFIG --base-cflags) - NET_SNMP_LIBS=$($NET_SNMP_CONFIG --agent-libs) - - CPPFLAGS_save="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $NET_SNMP_CFLAGS" - - LIBS_save="$LIBS" - LIBS="$LIBS $NET_SNMP_LIBS" - - AC_CHECK_HEADER([net-snmp/net-snmp-config.h],[ - AC_CHECK_FUNC([register_mib],[SNMP_SUBDIR="snmp"],[ - LIBS="$LIBS -lwrap" - NET_SNMP_LISB="$NET_SNMP_LIBS -lwrap" - # fail autoconf's cache - unset ac_cv_func_register_mib - AC_CHECK_FUNC([register_mib],[SNMP_SUBDIR="snmp"]) - ]) - ]) - - LIBS="$LIBS_save" - CPPFLAGS="$CPPFLAGS_save" - fi - AC_MSG_CHECKING([for SNMP support]) - if test "$SNMP_SUBDIR" ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no (see config.log for errors)]) - if test x$enable_snmp = xyes ; then - AC_MSG_ERROR([SNMP support was requested, but unavailable]) - fi - fi -fi - -agentdir='${pkglibdir}/snmp' -mibdir='${pkgdatadir}/snmp/mibs' - -AC_SUBST(NET_SNMP_CFLAGS) -AC_SUBST(NET_SNMP_LIBS) -AC_SUBST(agentdir) -AC_SUBST(mibdir) -]) - -# -# LS_CONFIG_FILE -# -# files that should be generated with AC_OUTPUT -# -AC_DEFUN([LS_CONFIG_FILES], -[AC_CONFIG_FILES([ -snmp/Makefile -snmp/autoconf/Makefile -]) -]) diff --git a/snmp/lustre-snmp-trap.c b/snmp/lustre-snmp-trap.c deleted file mode 100644 index 7caf5cea1a753d75acd7fde654d0c211a7c7763c..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp-trap.c +++ /dev/null @@ -1,532 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * include important headers - */ - -#include <net-snmp/net-snmp-config.h> -#include <net-snmp/net-snmp-includes.h> -#include <net-snmp/agent/net-snmp-agent-includes.h> - -/* - * include our .h file - */ - -#include <sys/types.h> -#include <sys/vfs.h> -#include <dirent.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdio.h> -#include <stdarg.h> -#include "lustre-snmp-util.h" - -/************************************************************************** - * Constants - *************************************************************************/ - -#define DEFAULT_POLL_INTERVAL_SECONDS 60 -#define POLL_INTERVAL_ENV_VAR "LSNMP_POLL_INTERVAL" -#define SNMP_HEALTH_CHECK_TEST_FILE "LSNMP_HEALTH_CHECK_TEST_FILE" - -/************************************************************************** - * Trap OIDS - *************************************************************************/ - -static oid objid_snmptrap[] = - { 1,3,6,1,6,3,1,1,4,1,0}; -static oid lustre_portals_trap[] = - { 1,3,6,1,4,1,13140,2,1,0,1}; -static oid lustre_portals_trap_string[]= - { 1,3,6,1,4,1,13140,2,1,0,2}; -static oid lustre_unhealthy_trap[] = - { 1,3,6,1,4,1,13140,2,1,0,3}; -static oid lustre_unhealthy_trap_device_name_string[]= - { 1,3,6,1,4,1,13140,2,1,0,4}; -static oid lustre_unhealthy_trap_reason_string[]= - { 1,3,6,1,4,1,13140,2,1,0,5}; - -/************************************************************************** - * Data structures - *************************************************************************/ - -typedef struct obd_unhealthy_entry_struct{ - - /*1-if seen as part of the the is_unhealthy scan, otherwise 0*/ - int seen; - - /*single linked list pointer*/ - struct obd_unhealthy_entry_struct *next; - - /*obdname - variable size*/ - char name[0]; - -}obd_unhealthy_entry; - -/************************************************************************** - * Local functions - *************************************************************************/ - -int get_poll_interval_seconds(); -void health_poll_worker(unsigned int registration_number, void *clientarg); -void send_portals_catastrophe_trap(char *reason_string); -void send_obd_unhealthy_trap(char *obd_name,char *reason_string); -int is_obd_newly_unhealthy(const char* obd_name); -void obd_unhealthy_scan(void); -void health_entry_parser(void); - -/************************************************************************** - * Global variables - *************************************************************************/ - -static int g_sent_portals_catastrophe = 0; -static obd_unhealthy_entry* g_obd_unhealthy_list = NULL; -static int g_poll_interval_seconds; -static unsigned int g_registration_handle; -static char *g_health_check_test_file = 0; - -/***************************************************************************** - * Function: initilize_trap_handler - * - * Description: Initlized the trap poll haalder. - * - * Input: void - * - * Output: Global g_poll_interval_seconds is set. - * - ****************************************************************************/ - -void initilize_trap_handler(void) -{ - g_poll_interval_seconds = get_poll_interval_seconds(); - - g_registration_handle = snmp_alarm_register(g_poll_interval_seconds, 0, health_poll_worker, NULL); - if (g_registration_handle == 0) - report("%s %s: line %d %s", __FILE__, __FUNCTION__, __LINE__, - "snmp_alarm_register failed"); - - DEBUGMSGTL(("lsnmpd","lsnmp alarm registered poll interval = %d seconds\n",g_poll_interval_seconds)); - - g_health_check_test_file = getenv(SNMP_HEALTH_CHECK_TEST_FILE); - if(g_health_check_test_file != 0) - DEBUGMSGTL(("lsnmpd","lsnmp health check test file set to \'%s\'\n",g_health_check_test_file)); -} - -/***************************************************************************** - * Function: terminate_trap_handler - * - * Description: Terminate the trap poll haalder. - * - * Input: void - * - * Output: Global g_poll_interval_seconds is set. - * - ****************************************************************************/ - -void terminate_trap_handler(void) -{ - snmp_alarm_unregister(g_registration_handle); -} - -/***************************************************************************** - * Function: get_poll_interval_seconds - * - * Description: This function used to get the poll period for timer, which - * is used to read throughput values periodically. - * Input: void - * Output: Alarm period, default value(if env var not set) otherwise. - ****************************************************************************/ - -int get_poll_interval_seconds() -{ - char *alarm_period; - int ret_val = DEFAULT_POLL_INTERVAL_SECONDS; - - /* Get Alarm period for reading the Lustre client table. */ - - alarm_period = getenv(POLL_INTERVAL_ENV_VAR); - if (alarm_period != NULL) { - char *ptr = alarm_period; - while(isdigit(*ptr)) ptr++; - - /* if we have only digits then conver it*/ - if (*ptr == '\0') { - int time = atoi(alarm_period); - if (time > 0) - ret_val = time; /* Alarm period in seconds */ - } - } - return ret_val; -} - -/***************************************************************************** - * Function: health_poll_worker - * - * Description: This is the routine registered to system timer for updating - * the throughput values for all the clients and its respective osc(s). - * - * Input: 'registration_number` value obtained during the alarm registration - * 'clientarg' pointing to user defined data type. - * Output: void - *****************************************************************************/ - -void health_poll_worker(unsigned int registration_number, void *clientarg) -{ - health_entry_parser(); - - /* Register the function again to call after lustre_alarm_period */ - if (!snmp_alarm_register(g_poll_interval_seconds, 0, health_poll_worker, NULL)) { - report("%s %s:line %d %s", __FILE__, __FUNCTION__, __LINE__, - "snmp_alarm_register failed"); - } -} - -/***************************************************************************** - * Function: health_entry_parser - * - * Description: This routine is called to parse the health_check entry - * and send traps - * Input: 'None - * Output: void - *****************************************************************************/ - - void health_entry_parser(void) -{ - FILE *fptr = NULL; - char string[MAX_LINE_SIZE]; - int b_seen_portals_catastrophe = 0; - const char *filename = g_health_check_test_file == 0 ? - LUSTRE_PATH FILENAME_SYSHEALTHCHECK : - g_health_check_test_file; - - /*DEBUGMSGTL(("lsnmpd","health_entry_parser(%s)\n",filename));*/ - - /* Open the file. Use the test file env variable if - there is one */ - fptr = fopen(filename,"r"); - - /* If the path is not found do nothing */ - if( NULL == fptr) - return; - - while( NULL != fgets(string, sizeof(string), fptr)){ - - /*DEBUGMSGTL(("lsnmpd","health_entry_parser() looking at = \'%s\'\n",string));*/ - - /* - * First handle the portals catastrophe - * Look for the string "LBUG" - */ - if(0 == strncmp(string,"LBUG",4)){ - /* - * If we haven't sent the catastrophe message yet - * send it now. And keep track that we've sent it - */ - if(!g_sent_portals_catastrophe){ - send_portals_catastrophe_trap("LBUG"); - g_sent_portals_catastrophe = 1; - } - b_seen_portals_catastrophe = 1; - } - - /* - * Now handle any of the OBD object failures - * look for "device <OBDNAME> reported unhealthy" - */ - else if(0 == strncmp(string,"device ",7)){ - char *obd_name = string+7; - char *space_after_obd_name; - - /* - * Now find the space after the obd name - * Again if there is no space we're in trouble - */ - space_after_obd_name = strchr(obd_name,' '); - if(space_after_obd_name == 0) - break; - - /* - * Null terminate the obd_name - */ - *space_after_obd_name = 0; - - DEBUGMSGTL(("lsnmpd","Looking at obd=%s\n",obd_name)); - - /* - * If we haven't sent a trap for this one - * then send it now - */ - if(is_obd_newly_unhealthy(obd_name)) - send_obd_unhealthy_trap(obd_name,"unhealthy"); - } - } - - /* If we don't find it reset the catastrope flag*/ - if(!b_seen_portals_catastrophe && g_sent_portals_catastrophe) - { - DEBUGMSGTL(("lsnmpd","LBUG has been cleared\n")); - g_sent_portals_catastrophe = 0; - } - - /* - * Any <OBDNAMES> that weren't queried above are now unhealthy. - * Scan through and cleanup the newly healthy obds - */ - obd_unhealthy_scan(); - - fclose(fptr); -} - -/***************************************************************************** - * Function: send_portals_catastrophe_trap - * - * Description: Send the SNMP V2 trap - * - * Input: 'reason_string' the reason for the catastrope. - - * Output: none - *****************************************************************************/ - -void send_portals_catastrophe_trap(char *reason_string) -{ - /* - * Setup the trap variables. - * It's a linked list of netsnmp_variable_list items. - */ - netsnmp_variable_list var_trap[2]; - - DEBUGMSGTL(("lsnmpd","Sending portals catastrophe trap reason=%s\n",reason_string)); - - /* - * Setup the first variable in the trap data. - * Have it chain to another variable. - */ - var_trap[0].next_variable = &var_trap[1]; - - /*The "name" must be the standard snmp "trap" OID.*/ - var_trap[0].name = objid_snmptrap; - var_trap[0].name_length = sizeof(objid_snmptrap) / sizeof(oid); - - /*But the data contained in this variable, is an OID that is the trap OID.*/ - var_trap[0].type = ASN_OBJECT_ID; - var_trap[0].val.objid = lustre_portals_trap; - var_trap[0].val_len = sizeof(lustre_portals_trap); - - /* - * Setup the second variable in the trap data. - * It is the last in the chain so set next to NULL - */ - var_trap[1].next_variable = NULL; - - /*The "name" is the OID of the portals trap reason strong*/ - var_trap[1].name = lustre_portals_trap_string; - var_trap[1].name_length = sizeof(lustre_portals_trap_string) / sizeof(oid); - - /*And the data is a octet string, that contains the actually reason string*/ - var_trap[1].type = ASN_OCTET_STR; - var_trap[1].val.string = reason_string; - var_trap[1].val_len = strlen(reason_string); - - /*And now send off the trap*/ - send_v2trap(var_trap); -} - - -/***************************************************************************** - * Function: send_obd_unhealthy_trap - * - * Description: Send the SNMP V2 trap - * - * Input: 'obd_name' the name of the obd - * 'reason_string' the reason for the catastrope. - * Output: none - *****************************************************************************/ - -void send_obd_unhealthy_trap(char *obd_name,char *reason_string) -{ - /* - * Setup the trap variables. - * It's a linked list of netsnmp_variable_list items. - */ - netsnmp_variable_list var_trap[3]; - - DEBUGMSGTL(("lsnmpd","Sending OBD unhealthy trap obd=%s reason=%s\n",obd_name,reason_string)); - - /* - * Setup the first variable in the trap data. - * Have it chain to another variable. - */ - var_trap[0].next_variable = &var_trap[1]; - - /*The "name" must be the standard snmp "trap" OID.*/ - var_trap[0].name = objid_snmptrap; - var_trap[0].name_length = sizeof(objid_snmptrap) / sizeof(oid); - - /*But the data contained in this variable, is an OID that is the trap OID.*/ - var_trap[0].type = ASN_OBJECT_ID; - var_trap[0].val.objid = lustre_unhealthy_trap; - var_trap[0].val_len = sizeof(lustre_unhealthy_trap); - - /* - * Setup the second variable in the trap data. - * Have it chain to another variable. - */ - var_trap[1].next_variable = &var_trap[2];; - - /*The "name" is the OID of the portals trap reason strong*/ - var_trap[1].name = lustre_unhealthy_trap_device_name_string; - var_trap[1].name_length = sizeof(lustre_unhealthy_trap_device_name_string) / sizeof(oid); - - /*And the data is a octet string, that contains the actually reason strong*/ - var_trap[1].type = ASN_OCTET_STR; - var_trap[1].val.string = obd_name; - var_trap[1].val_len = strlen(obd_name); - - /* - * Setup the third variable in the trap data. - * It is the last in the chain so set next to NULL - */ - var_trap[2].next_variable = NULL; - - /*The "name" is the OID of the portals trap reason strong*/ - var_trap[2].name = lustre_unhealthy_trap_reason_string; - var_trap[2].name_length = sizeof(lustre_unhealthy_trap_reason_string) / sizeof(oid); - - /*And the data is a octet string, that contains the actually reason strong*/ - var_trap[2].type = ASN_OCTET_STR; - var_trap[2].val.string = reason_string; - var_trap[2].val_len = strlen(reason_string); - - /*And now send off the trap*/ - send_v2trap(var_trap); -} - - -/***************************************************************************** - * Function: is_obd_newly_unhealthy - * - * Description: Deterime if the obd is going from health->unhealth - * Also mark all unhealhy (new and old) as seen. - * - * Input: 'obd_name' the name of the obd - * - * Output: 1 if newly unhealthy 0 if previolsy unhealthy - *****************************************************************************/ - -int is_obd_newly_unhealthy(const char* obd_name) -{ - /*for all elements in g_obd_unhealthy_list*/ - obd_unhealthy_entry* walker; - obd_unhealthy_entry* entry; - int name_len; - - for(walker = g_obd_unhealthy_list; walker != 0; walker = walker->next) - { - /*If the names match*/ - if(0 == strcmp (walker->name,obd_name)) - { - /* Commented out because it was just to noisy! - * DEBUGMSGTL(("lsnmpd","obd %s was already unhealthy\n",obd_name)); - */ - - /*Mark the entry as seen, and return that it was previously unhealthy*/ - walker->seen =1; - return 0; - } - } - - DEBUGMSGTL(("lsnmpd","obd %s is now unhealthy\n",obd_name)); - - /*We didn't find an entry so we need to create a new one. */ - /*Calculate the obd_name length*/ - name_len = strlen(obd_name)+1; - - /*Allocate a new entry*/ - entry = malloc(sizeof(*entry) + name_len); - - /*Put this element at the front of the list*/ - entry->next = g_obd_unhealthy_list; - g_obd_unhealthy_list = entry; - - /*Mark it initially as seen*/ - entry->seen = 1; - - /*And copy the entry name*/ - memcpy(entry->name,obd_name,name_len); - - /*return this obd as newly unhealthy.*/ - return 1; -} - - -/***************************************************************************** - * Function: obd_unhealthy_scan - * - * Description: Deterime if any obd is going from unhealthy->healthy - * Any of the obds that weren't "seen" by the - * is_obd_newly_unhealthy() pass are now health so - * remove them from the lists - * Also clear all "seen" flags. - * - * Input: None - * Output: None - *****************************************************************************/ - -void obd_unhealthy_scan(void) -{ - /*fore all elements in g_obd_unhealthy_list*/ - obd_unhealthy_entry* walker = g_obd_unhealthy_list; - obd_unhealthy_entry* prev = 0; - while(walker != 0) - { - /*remove any that was not seen as unhealthy the last time*/ - if(walker->seen == 0) - { - /*Remove element from the list, but first fix up the walker pointer*/ - obd_unhealthy_entry* temp = walker; - - DEBUGMSGTL(("lsnmpd","obd %s is now healthy\n",walker->name)); - - walker = walker->next; - - /*Now adjust the pointers to effectively remove this entry*/ - if(prev == 0) - g_obd_unhealthy_list = walker; - else - prev->next = walker; - - /*And free the pointer. */ - free(temp); - /*walker and prev are correctly setup so we can go around the loop again.*/ - } - - /*Mark all other entries as NOT seen for next pass through*/ - else - { - walker->seen = 0; - /*Go onto the next entry*/ - prev = walker; - walker = walker->next; - } - } -} diff --git a/snmp/lustre-snmp-trap.h b/snmp/lustre-snmp-trap.h deleted file mode 100644 index ce823244b642e6ee1537a8629b0d7fbf214bbbac..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp-trap.h +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef LUSTRE_SNMP_TRAP_H -#define LUSTRE_SNMP_TRAP_H - -extern void initilize_trap_handler(void); -extern void terminate_trap_handler(void); - -#endif diff --git a/snmp/lustre-snmp-util.c b/snmp/lustre-snmp-util.c deleted file mode 100644 index 8048f2d4d6176ae759e0832a29cb050f88a47b76..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp-util.c +++ /dev/null @@ -1,652 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * include important headers - */ - -#include <net-snmp/net-snmp-config.h> -#include <net-snmp/net-snmp-includes.h> -#include <net-snmp/agent/net-snmp-agent-includes.h> - -/* - * include our .h file - */ - -#include <sys/types.h> -#include <sys/vfs.h> -#include <dirent.h> -#include <sys/stat.h> -#include <unistd.h> -#include <stdio.h> -#include <stdarg.h> -#include "lustre-snmp-util.h" - -/********************************************************************* - * Function: get_file_list - * - * Description: For the given valid directory path, returns the list - * all directories or files in that path. - * - * Input: 'dirname' the directory path. - * 'file_type' if this takes the value DIR_TYPE then - * returns the list of directories in that path. - * If its of type FILE_TYPE then returns the list of files - * in that path. - * 'count' pointer to number of elements returned in the - * return string. - * - * Output: List of directories/files in that path. - * - *********************************************************************/ - -char *get_file_list(const char *dirname, int file_type, uint32_t *count) -{ - - DIR *pdir = NULL; - struct dirent *pdirent = NULL; - int curr_offset = 0; - int byte_count = 0; - int file_count = 0; - char *ret_str = NULL; - char filename[MAX_PATH_SIZE]; - int cond1, cond2; - - if ((dirname == NULL) || ((pdir = opendir(dirname)) == NULL )) { - if (dirname == NULL) { - report("%s %s:line %d %s", __FILE__, __FUNCTION__, __LINE__, - "NULL directory is passed as parameter to funtion"); - } else { - report("%s %s:line %d Error in opening the dir %s", __FILE__, - __FUNCTION__, __LINE__, dirname); - } - if (count) - *count = 0; - return NULL; - } - - while (1) { - if ((pdirent = readdir(pdir)) == NULL) - break; - - /* Skip over '.' and '..' directores */ - if ((pdirent->d_name[0] == '.') || - !strcmp(pdirent->d_name, FILENAME_NUM_REF)) - continue; - - sprintf(filename, "%s/%s", dirname, pdirent->d_name); - cond1 = (file_type == FILE_TYPE) && is_directory(filename); - cond2 = (file_type == DIR_TYPE) && (!is_directory(filename)); - - if (cond1 || cond2) - continue; - - /* Calculate the number of bytes for this new entry.*/ - byte_count += strlen(pdirent->d_name) + 1; - file_count++; - } - if (count) - *count = file_count; - - if (file_count != 0) { - - /* need one extra one for the finall NULL terminator*/ - if ((ret_str = (char *) malloc(byte_count + 1)) == NULL) { - report("get_file_list() failed to malloc(%d)",byte_count+1); - closedir(pdir); - return NULL; - } - - rewinddir(pdir); - - while (file_count != 0) { - if ((pdirent = readdir(pdir)) == NULL) - break; - - if ((pdirent->d_name[0] == '.') || - !strcmp(pdirent->d_name, FILENAME_NUM_REF)) - continue; - - sprintf(filename, "%s/%s", dirname, pdirent->d_name); - cond1 = (file_type == FILE_TYPE) && is_directory(filename); - cond2 = (file_type == DIR_TYPE) && (!is_directory(filename)); - - if (cond1 || cond2) - continue; - - strcpy(ret_str + curr_offset, pdirent->d_name); - curr_offset = curr_offset + strlen(pdirent->d_name) + 1; - file_count--; - } - /* Put in the finall null terminator*/ - ret_str[byte_count] = '\0'; - } - closedir(pdir); - return ret_str; -} - - -/********************************************************************* - * Function: is_directory - * - * Description: Checks if given filename is a directory or not. - * all directories or files in that path. - * - * Input: 'filename' the directory path to be checked. - * - * Output: Returns 1 if its a directory else 0. - * - *********************************************************************/ - -int is_directory(const char *filename) -{ - - struct stat statf; - int result; - - result = stat(filename, &statf); - return ((result == SUCCESS) && (statf.st_mode & S_IFDIR)); -} - -/********************************************************************* - * Function: read_string - * - * Description: For the given valid file path, reads the data in - * that file. - * - * Input: 'filepath' the file whose data is to be accessed. - * 'lustre_var' the data from the file is read into - * this variable, returned to the requestor. - * 'var_max_size' the max size of the string - * 'report_error' boolean if error should be reported on - * missing filepath - * - * Output: Returns SUCCESS if read successfully from file else - * returns ERROR. - *********************************************************************/ - -int read_string(const char *filepath, char *lustre_var, size_t var_max_size) -{ - FILE *fptr = NULL; - int len = 0; - int ret_val = SUCCESS; - int report_error = 1; - - if ((filepath == NULL) || (lustre_var == NULL)) { - report("%s %s:line %d %s", __FILE__, __FUNCTION__, __LINE__, - "Input parameter is NULL"); - ret_val = ERROR; - } else { - fptr = fopen(filepath, "r"); - - if (fptr == NULL) { - if(report_error) - report("%s %s:line %d Unable to open the file %s", __FILE__, - __FUNCTION__, __LINE__, filepath); - ret_val = ERROR; - } else { - if (fgets(lustre_var, var_max_size, fptr) == NULL) { - report("%s %s:line %d read failed for file %s", __FILE__, - __FUNCTION__, __LINE__, filepath); - ret_val = ERROR; - } else { - len = strlen(lustre_var); - /* - Last char is EOF, before string ends, - so '\0' is moved to last but one. - */ - lustre_var[len-1] = lustre_var[len]; - } - fclose(fptr); - } - } - return ret_val; -} - -/************************************************************************** - * Function: lustrefs_ctrl - * - * Description: Execute /etc/init.d/lustre script for starting, - * stopping and restarting Lustre services in child process. - * - * Input: Start/Stop/Restart Command Number. - * Output: Returns void - * - **************************************************************************/ - -void lustrefs_ctrl(int command) -{ - char *cmd[3]; - - cmd[0] = LUSTRE_SERVICE; - switch (command) { - case ONLINE: - cmd[1] = "start"; - break; - case OFFLINE: - cmd[1] = "stop"; - break; - case RESTART: - cmd[1] = "restart"; - break; - default: - return; - } - - cmd[2] = (char *)0; - - if (fork() == 0) { - execvp(cmd[0], cmd); - report("failed to execvp(\'%s %s\')",cmd[0],cmd[1]); - } - return; -} - -/***************************************************************************** - * Function: get_sysstatus - * - * Description: Read /var/lustre/sysStatus file, and based on file contents - * return the status of Lustre services. - * - * Input: void - * Output: Return ONLINE/OFFLINE/ONLINE PENDING/OFFLINE PENDING status - * values. - * - ****************************************************************************/ - -int get_sysstatus(void) -{ - FILE *fptr = NULL; - int len = 0; - int ret_val = ERROR ; - char sys_status[50] = {0}; - - if(SUCCESS == read_string(FILENAME_SYS_STATUS,sys_status,sizeof(sys_status))) - { - if (memcmp(sys_status, STR_ONLINE_PENDING,strlen(STR_ONLINE_PENDING)) == 0) - ret_val = ONLINE_PENDING; - else if (memcmp(sys_status, STR_ONLINE, strlen(STR_ONLINE)) == 0) - ret_val = ONLINE; - else if (memcmp(sys_status, STR_OFFLINE_PENDING,strlen(STR_OFFLINE_PENDING)) == 0) - ret_val = OFFLINE_PENDING; - else if (memcmp(sys_status, STR_OFFLINE, strlen(STR_OFFLINE)) == 0) - ret_val = OFFLINE; - else - report("%s %s:line %d Bad Contents in file %s \'%s\'", __FILE__, - __FUNCTION__, __LINE__, FILENAME_SYS_STATUS,sys_status); - } - return ret_val; -} - - -/***************************************************************************** - * Function: read_ulong - * - * Description: Read long values from lproc and copy to the location - * pointed by input parameter. - * - * Input: file path, and pointer for data to be copied - * - * Output: Return ERROR or SUCCESS. - * - ****************************************************************************/ - -int read_ulong(const char *file_path, unsigned long *valuep) -{ - char file_data[MAX_LINE_SIZE]; - int ret_val; - - if ((ret_val = read_string(file_path, file_data,sizeof(file_data))) == SUCCESS){ - *valuep = strtoul(file_data,NULL,10); - } - return ret_val; -} - -/***************************************************************************** - * Function: read_counter64 - * - * Description: Read counter64 values from lproc and copy to the location - * pointed by input parameter. - * - * Input: file path, and pointer for data to be copied - * - * Output: Return ERROR or SUCCESS. - * - ****************************************************************************/ - -int read_counter64(const char *file_path, counter64 *c64,int factor) -{ - char file_data[MAX_LINE_SIZE]; - int ret_val; - unsigned long long tmp = 0; - - if ((ret_val = read_string(file_path, file_data,sizeof(file_data))) == SUCCESS) { - tmp = atoll(file_data) * factor; - c64->low = (ulong) (0x0FFFFFFFF & tmp); - tmp >>= 32; /* Shift right by 4 bytes */ - c64->high = (ulong) (0x0FFFFFFFF & tmp); - } - return ret_val; -} - -/***************************************************************************** - * Function: get_nth_entry_from_list - * - * Description: Find the n'th entry from a null terminated list of string - * - * Input: dir_list - the list - * num - the number of elements in the list - * index - the index we are looking for - * - * Output: Return NULL on failure, or the string name on success. - * - ****************************************************************************/ - -const char *get_nth_entry_from_list(const char* dir_list,int num,int index) -{ - int i; - int cur_ptr = 0; - for(i=0;i<num;i++){ - - /* - * if we've reached the end of the list for some reason - * because num was wrong then stop processing - */ - if( *(dir_list+cur_ptr) == 0) - break; - - /* If we've found the right one */ - if( i == index ) - return dir_list+cur_ptr; - - /* Move to the next one*/ - cur_ptr += strlen(dir_list + cur_ptr)+1; - } - return NULL; -} - -/***************************************************************************** - * Function: report - * - * Description: This function used to report error msg to stderr and log into - * log file(default file:/var/log/snmpd.log) when agent is started with - * debug option -Dlsnmpd - * Input: format string and variable arguments. - * Output: void - ****************************************************************************/ - -void report(const char *fmt, ...) -{ - char buf[1024]; - - va_list arg_list; - va_start(arg_list, fmt); - vsprintf(buf, fmt, arg_list); - va_end(arg_list); - - DEBUGMSGTL(("lsnmpd", "%s\n", buf)); - fprintf(stderr, "%s\n", buf); - return; -} - - - -/************************************************************************** - * Function: oid_table_ulong_handler - * - * Description: Fetch a unsigned long from the given location. - * Setup var_len, and return a pointer to the data. - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* - oid_table_ulong_handler( - const char* file_path, - size_t *var_len) -{ - static unsigned long ulong_ret; - if (SUCCESS != read_ulong(file_path,&ulong_ret)) - return NULL; - *var_len = sizeof(ulong_ret); - return (unsigned char *) &ulong_ret; -} - -/************************************************************************** - * Function: oid_table_c64_handler - * - * Description: Fetch a counter64 from the given location. - * Setup var_len, and return a pointer to the data. - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* oid_table_c64_handler(const char* file_path,size_t *var_len) -{ - static counter64 c64; - if (SUCCESS != read_counter64(file_path,&c64,1)) - return NULL; - *var_len = sizeof(c64); - return (unsigned char *) &c64; -} - -/************************************************************************** - * Function: oid_table_c64_kb_handler - * - * Description: Fetch a counter64 from the given location. - * Setup var_len, and return a pointer to the data. - * Different than oid_table_c64_handler in that - * the original value is multiplied by 1024 before converting - * to a counter64. (e.g. turn KB into a Byte scaled value) - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* oid_table_c64_kb_handler(const char* file_path,size_t *var_len) -{ - static counter64 c64; - /* scale by factor of 1024*/ - if (SUCCESS != read_counter64(file_path,&c64,1024)) - return NULL; - *var_len = sizeof(c64); - return (unsigned char *) &c64; -} - -/************************************************************************** - * Function: oid_table_obj_name_handler - * - * Description: Just copy the file_path and return as the output value. - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* - oid_table_obj_name_handler( - const char* file_path, - size_t *var_len) -{ - static unsigned char string[SPRINT_MAX_LEN]; - *var_len = strlen(file_path); - *var_len = MIN_LEN(*var_len, sizeof(string)); - memcpy(string, file_path, *var_len); - return (unsigned char *) string; -} - -/************************************************************************** - * Function: oid_table_string_handler - * - * Description: Fetch a string from the given location. - * Setup var_len, and return a pointer to the data. - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* - oid_table_string_handler( - const char* file_path, - size_t *var_len) -{ - static unsigned char string[SPRINT_MAX_LEN]; - if( SUCCESS != read_string(file_path, string,sizeof(string))) - return NULL; - *var_len = strlen(string); - return (unsigned char *) string; -} - - -/************************************************************************** - * Function: oid_table_is_directory_handler - * - * Description: Determine if the file_path is a directory. - * Setup a boolean return value. - * Setup var_len, and return a pointer to the data. - * - * Input: file_path, and var_len pointer - * - * Output: NULL on failure, or pointer to data - * - **************************************************************************/ - -unsigned char* - oid_table_is_directory_handler( - const char* file_path, - size_t *var_len) -{ - static long long_ret; - long_ret = is_directory(file_path); - *var_len = sizeof(long_ret); - return (unsigned char *) &long_ret; -} - -/************************************************************************** - * Function: var_genericTable - * - * Description: Handle Table driven OID processing - * - **************************************************************************/ - -unsigned char * -var_genericTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method, - const char *path, - struct oid_table *ptable) -{ - char *dir_list; - uint32_t num; - int deviceindex; - unsigned char *ret_val = NULL; - int i=0; - const char* obj_name; - - - /* - * Get the list of file. If there are no elements - * return nothing - */ - if( 0 == (dir_list = get_file_list(path, DIR_TYPE, &num))) - return NULL; - - /* - * Setup the table - */ - if (header_simple_table(vp,name,length,exact,var_len,write_method, num) - == MATCH_FAILED ) - goto cleanup_and_exit; - - /* - * The number of the device we're looking at - */ - deviceindex = name[*length - 1] - 1; - - /* - * If we couldn't find this element - * something must have recently changed return - * nothing - */ - if(deviceindex >= num){ - report("deviceindex=%d exceeds number of elements=%d",deviceindex,num); - goto cleanup_and_exit; - } - - /* - * Fetch the object name from the list - */ - obj_name = get_nth_entry_from_list(dir_list,num,deviceindex); - if(obj_name == NULL){ - /* - * Note this should never really happen because we check deviceindex >=num - * above. And dir_list should be consitent with num - * but just in case... - */ - report("object name not found in list",deviceindex,num); - goto cleanup_and_exit; - } - - /* - * Find the matching magic - or the end of the list - */ - while(ptable[i].magic != vp->magic && ptable[i].magic != 0) - i++; - - /* - * If we didn't find a matching entry return - */ - if(ptable[i].magic==0) - goto cleanup_and_exit; - - /* - * If the name is NULL is a special case and - * just just pass the obj_name as the file_path - * otherwise we create a file path from the given components - */ - if(ptable[i].name != 0){ - char file_path[MAX_PATH_SIZE]; - sprintf(file_path, "%s%s/%s",path,obj_name,ptable[i].name); - ret_val = ptable[i].fhandler(file_path,var_len); - } - else - ret_val = ptable[i].fhandler(obj_name,var_len); - -cleanup_and_exit: - free(dir_list); - return ret_val; -}; - diff --git a/snmp/lustre-snmp-util.h b/snmp/lustre-snmp-util.h deleted file mode 100644 index 087e8cc1dcd91721ce9eda1e62b8591b87c21487..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp-util.h +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef LUSTRE_SNMP_UTIL_H -#define LUSTRE_SNMP_UTIL_H - -/* - * Definitions of magic values - */ - -#define SYSVERSION 20 -#define SYSKERNELVERSION 21 -#define SYSHEALTHCHECK 22 -#define SYSSTATUS 23 - -#define OSDNUMBER 30 -#define OSDUUID 31 -#define OSDCOMMONNAME 32 -#define OSDCAPACITY 33 -#define OSDFREECAPACITY 34 -#define OSDOBJECTS 35 -#define OSDFREEOBJECTS 36 - -#define OSCNUMBER 40 -#define OSCUUID 41 -#define OSCCOMMONNAME 42 -#define OSCOSTSERVERUUID 43 -#define OSCCAPACITY 44 -#define OSCFREECAPACITY 45 -#define OSCOBJECTS 46 -#define OSCFREEOBJECTS 47 - -#define MDDNUMBER 50 -#define MDDUUID 51 -#define MDDCOMMONNAME 52 -#define MDDCAPACITY 53 -#define MDDFREECAPACITY 54 -#define MDDFILES 55 -#define MDDFREEFILES 56 - -#define MDCNUMBER 60 -#define MDCUUID 61 -#define MDCCOMMONNAME 62 -#define MDCMDSSERVERUUID 63 -#define MDCCAPACITY 64 -#define MDCFREECAPACITY 65 -#define MDCOBJECTS 66 -#define MDCFREEOBJECTS 67 - -#define CLIMOUNTNUMBER 70 -#define CLIUUID 71 -#define CLICOMMONNAME 72 -#define CLIMDCUUID 73 -#define CLIMDCCOMMONNAME 74 -#define CLIUSESLOV 75 -#define CLILOVUUID 76 -#define CLILOVCOMMONNAME 77 - -#define LOVNUMBER 80 -#define LOVUUID 81 -#define LOVCOMMONNAME 82 -#define LOVNUMOBD 83 -#define LOVNUMACTIVEOBD 84 -#define LOVCAPACITY 85 -#define LOVFREECAPACITY 86 -#define LOVFILES 87 -#define LOVFREEFILES 88 -#define LOVSTRIPECOUNT 89 -#define LOVSTRIPEOFFSET 90 -#define LOVSTRIPESIZE 91 -#define LOVSTRIPETYPE 92 - -#define LDLMNUMBER 100 -#define LDLMNAMESPACE 101 -#define LDLMLOCKCOUNT 102 -#define LDLMUNUSEDLOCKCOUNT 103 -#define LDLMRESOURCECOUNT 104 - -/* Defining the proc paths for Lustre file system */ -#define LUSTRE_PATH "/proc/fs/lustre/" -#define OSD_PATH LUSTRE_PATH "obdfilter/" -#define OSC_PATH LUSTRE_PATH "osc/" -#define MDS_PATH LUSTRE_PATH "mds/" -#define MDC_PATH LUSTRE_PATH "mdc/" -#define CLIENT_PATH LUSTRE_PATH "llite/" -#define LOV_PATH LUSTRE_PATH "lov/" -#define LDLM_PATH LUSTRE_PATH "ldlm/namespaces/" - -/* Common procfs file entries that are refrenced in mulitple locations*/ -#define FILENAME_SYSHEALTHCHECK "health_check" -#define FILENAME_SYS_STATUS "/var/lustre/sysStatus" - -#define FILENAME_NUM_REF "num_refs" -#define FILENAME_UUID "uuid" -#define FILENAME_COMMON_NAME "common_name" -#define FILENAME_KBYTES_TOTAL "kbytestotal" -#define FILENAME_KBYTES_FREE "kbytesfree" -#define FILENAME_FILES_TOTAL "filestotal" -#define FILENAME_FILES_FREE "filesfree" - -/* strings which the file /var/lustre/sysStatus can hold */ -#define STR_ONLINE "online" -#define STR_ONLINE_PENDING "online pending" -#define STR_OFFLINE "offline" -#define STR_OFFLINE_PENDING "offline pending" - - -/* Script required for starting/stopping lustre services */ -#define LUSTRE_SERVICE "/etc/init.d/lustre" - -#define MIN_LEN(val1,val2) (((val1)>(val2))?(val2):(val1)) - -/* The max size of a lustre procfs path name*/ -#define MAX_PATH_SIZE 512 - -/* The max size of a string read from procfs */ -#define MAX_LINE_SIZE 512 - -/* Types passed to get_file_list() */ -#define DIR_TYPE 1 -#define FILE_TYPE 0 - -/* Defining return values */ -#define SUCCESS 0 -#define ERROR -1 - -typedef struct counter64 counter64; - -typedef enum { - ONLINE = 1, - OFFLINE, - ONLINE_PENDING, - OFFLINE_PENDING, - RESTART -} lustre_sysstatus; - -/* File operation related functions */ -char *get_file_list(const char *dirname, int file_type, uint32_t *count); -extern int is_directory(const char *filename); -extern int read_string(const char *filepath, char *lustre_var,size_t var_size); -int read_counter64(const char *file_path, counter64 *c64,int factor); -int read_ulong(const char *file_path,unsigned long* valuep); - -/* Start/Stop/Restart Lustre Services */ -extern void lustrefs_ctrl(int command); -extern int get_sysstatus(); - -extern void report(const char *fmt, ...); - -/* Table Driven SNMP OID Handler support*/ -typedef unsigned char* (*f_oid_handler_t)( - const char* file_path, - size_t *var_len); - -struct oid_table -{ - int magic; /*The magic number*/ - const char *name; /*The procfs name*/ - f_oid_handler_t fhandler; /*The handler */ -}; - -unsigned char* oid_table_ulong_handler(const char* file_path,size_t *var_len); -unsigned char* oid_table_c64_handler(const char* file_path,size_t *var_len); -unsigned char* oid_table_c64_kb_handler(const char* file_path,size_t *var_len); -unsigned char* oid_table_obj_name_handler(const char* file_path,size_t *var_len); -unsigned char* oid_table_string_handler(const char* file_path,size_t *var_len); -unsigned char* oid_table_is_directory_handler(const char* file_path,size_t *var_len); -unsigned char * - var_genericTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method, - const char *path, - struct oid_table *ptable); - -#endif /* LUSTRE_SNMP_UTIL_H */ diff --git a/snmp/lustre-snmp.c b/snmp/lustre-snmp.c deleted file mode 100644 index 0de2c4dda122a37d321e6483cc86fbcca3751769..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp.c +++ /dev/null @@ -1,601 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <net-snmp/net-snmp-config.h> -#include <net-snmp/net-snmp-includes.h> -#include <net-snmp/agent/net-snmp-agent-includes.h> -#include <net-snmp/utilities.h> -#include <pthread.h> -#include "lustre-snmp.h" - -/* - * clusterFileSystems_variables_oid: - * this is the top level oid that we want to register under. This - * is essentially a prefix, with the suffix appearing in the - * variable below. - */ - - -oid clusterFileSystems_variables_oid[] = { 1,3,6,1,4,1,13140 }; - - -/* - * variable7 clusterFileSystems_variables: - * this variable defines function callbacks and type return information - * for the clusterFileSystems mib section - */ - - -struct variable7 clusterFileSystems_variables[] = { -/* magic number , variable type , ro/rw , callback fn , L, oidsuffix */ - - /* sytemInformation 2.1.1. */ - { SYSVERSION , ASN_OCTET_STR , RONLY , var_clusterFileSystems, 4, { 2,1,1,1 } }, - { SYSKERNELVERSION , ASN_OCTET_STR , RONLY , var_clusterFileSystems, 4, { 2,1,1,2 } }, - { SYSHEALTHCHECK , ASN_OCTET_STR , RONLY , var_clusterFileSystems, 4, { 2,1,1,3 } }, - { SYSSTATUS , ASN_INTEGER , RWRITE, var_clusterFileSystems, 4, { 2,1,1,4 } }, - - /* objectStorageTargets 2.1.2 */ - { OSDNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,2,1 } }, - - /* objectStorageTargets.osdTable.osdEntry 2.1.2.2.1 */ - { OSDUUID , ASN_OCTET_STR , RONLY , var_osdTable, 6, { 2,1,2,2,1,2 } }, - { OSDCOMMONNAME , ASN_OCTET_STR , RONLY , var_osdTable, 6, { 2,1,2,2,1,3 } }, - { OSDCAPACITY , ASN_COUNTER64 , RONLY , var_osdTable, 6, { 2,1,2,2,1,4 } }, - { OSDFREECAPACITY , ASN_COUNTER64 , RONLY , var_osdTable, 6, { 2,1,2,2,1,5 } }, - { OSDOBJECTS , ASN_COUNTER64 , RONLY , var_osdTable, 6, { 2,1,2,2,1,6 } }, - { OSDFREEOBJECTS , ASN_COUNTER64 , RONLY , var_osdTable, 6, { 2,1,2,2,1,7 } }, - - /* objectStorageClients 2.1.3 */ - { OSCNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,3,1 } }, - - /* objectStorageClients.oscTable.oscEntry 2.1.3.2.1 */ - { OSCUUID , ASN_OCTET_STR , RONLY , var_oscTable, 6, { 2,1,3,2,1,2 } }, - { OSCCOMMONNAME , ASN_OCTET_STR , RONLY , var_oscTable, 6, { 2,1,3,2,1,3 } }, - { OSCOSTSERVERUUID , ASN_OCTET_STR , RONLY , var_oscTable, 6, { 2,1,3,2,1,4 } }, - { OSCCAPACITY , ASN_COUNTER64 , RONLY , var_oscTable, 6, { 2,1,3,2,1,5 } }, - { OSCFREECAPACITY , ASN_COUNTER64 , RONLY , var_oscTable, 6, { 2,1,3,2,1,6 } }, - { OSCOBJECTS , ASN_COUNTER64 , RONLY , var_oscTable, 6, { 2,1,3,2,1,7 } }, - { OSCFREEOBJECTS , ASN_COUNTER64 , RONLY , var_oscTable, 6, { 2,1,3,2,1,8 } }, - - - /* metaDataServers 2.1.4 */ - { MDDNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,4,1 } }, - - /* metaDataServers.mddTable.mddEntry 2.1.4.2.1 */ - { MDDUUID , ASN_OCTET_STR , RONLY , var_mdsTable, 6, { 2,1,4,2,1,2 } }, - { MDDCOMMONNAME , ASN_OCTET_STR , RONLY , var_mdsTable, 6, { 2,1,4,2,1,3 } }, - { MDDCAPACITY , ASN_COUNTER64 , RONLY , var_mdsTable, 6, { 2,1,4,2,1,4 } }, - { MDDFREECAPACITY , ASN_COUNTER64 , RONLY , var_mdsTable, 6, { 2,1,4,2,1,5 } }, - { MDDFILES , ASN_COUNTER64 , RONLY , var_mdsTable, 6, { 2,1,4,2,1,6 } }, - { MDDFREEFILES , ASN_COUNTER64 , RONLY , var_mdsTable, 6, { 2,1,4,2,1,7 } }, - - /* metaDataClients 2.1.5 */ - { MDCNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,5,1 } }, - - /* metaDataClients.mdcTable.mdcEntry 2.1.5.2.1 */ - { MDCUUID , ASN_OCTET_STR , RONLY , var_mdcTable, 6, { 2,1,5,2,1,2 } }, - { MDCCOMMONNAME , ASN_OCTET_STR , RONLY , var_mdcTable, 6, { 2,1,5,2,1,3 } }, - { MDCMDSSERVERUUID , ASN_OCTET_STR , RONLY , var_mdcTable, 6, { 2,1,5,2,1,4 } }, - { MDCCAPACITY , ASN_COUNTER64 , RONLY , var_mdcTable, 6, { 2,1,5,2,1,5 } }, - { MDCFREECAPACITY , ASN_COUNTER64 , RONLY , var_mdcTable, 6, { 2,1,5,2,1,6 } }, - { MDCOBJECTS , ASN_COUNTER64 , RONLY , var_mdcTable, 6, { 2,1,5,2,1,7 } }, - { MDCFREEOBJECTS , ASN_COUNTER64 , RONLY , var_mdcTable, 6, { 2,1,5,2,1,8 } }, - - /* lustreClients 2.1.6 */ - { CLIMOUNTNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,6,1 } }, - - /* lustreClients.cliMountTable.cliMountEntry 2.1.6.2.1 */ - { CLIUUID , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,2 } }, - { CLICOMMONNAME , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,3 } }, - { CLIMDCUUID , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,4 } }, - { CLIMDCCOMMONNAME , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,5 } }, - { CLIUSESLOV , ASN_INTEGER , RONLY , var_cliTable, 6, { 2,1,6,2,1,6 } }, - { CLILOVUUID , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,7 } }, - { CLILOVCOMMONNAME , ASN_OCTET_STR , RONLY , var_cliTable, 6, { 2,1,6,2,1,8 } }, - - /* logicalObjectVolume 2.1.7 */ - { LOVNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,7,1 } }, - - /* logicalObjectVolume.osdTable.lovTable 2.1.2.2.1 */ - { LOVUUID , ASN_OCTET_STR , RONLY , var_lovTable, 6, { 2,1,7,2,1,2 } }, - { LOVCOMMONNAME , ASN_OCTET_STR , RONLY , var_lovTable, 6, { 2,1,7,2,1,3 } }, - { LOVNUMOBD , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,4 } }, - { LOVNUMACTIVEOBD , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,5 } }, - { LOVCAPACITY , ASN_COUNTER64 , RONLY , var_lovTable, 6, { 2,1,7,2,1,6 } }, - { LOVFREECAPACITY , ASN_COUNTER64 , RONLY , var_lovTable, 6, { 2,1,7,2,1,7 } }, - { LOVFILES , ASN_COUNTER64 , RONLY , var_lovTable, 6, { 2,1,7,2,1,8 } }, - { LOVFREEFILES , ASN_COUNTER64 , RONLY , var_lovTable, 6, { 2,1,7,2,1,9 } }, - { LOVSTRIPECOUNT , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,10} }, - { LOVSTRIPEOFFSET , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,11} }, - { LOVSTRIPESIZE , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,12} }, - { LOVSTRIPETYPE , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,13} }, - - /* lustreLDLM 2.1.8 */ - { LDLMNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,8,1 } }, - - /* lustreLDLM.ldlmTable.ldlmEntry 2.1.8.2.1 */ - { LDLMNAMESPACE , ASN_OCTET_STR , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,2 } }, - { LDLMLOCKCOUNT , ASN_UNSIGNED , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,3 } }, - { LDLMUNUSEDLOCKCOUNT , ASN_UNSIGNED , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,4 } }, - { LDLMRESOURCECOUNT , ASN_UNSIGNED , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,5 } }, - -}; - -/***************************************************************************** - * Function: init_cfsNetSNMPPlugin - * - * Description: Called when the agent starts up - * - * Input: void - * - * Output: None - * - ****************************************************************************/ - -void init_lustresnmp(void) { - - /* register ourselves with the agent to handle our mib tree */ - REGISTER_MIB("clusterFileSystems", clusterFileSystems_variables, variable7, - clusterFileSystems_variables_oid); - - initilize_trap_handler(); - - DEBUGMSGTL(("lsnmpd", "%s %s \n", __FUNCTION__, "Initialization Done")); -} - -/***************************************************************************** - * Function: deinit_cfsNetSNMPPlugin - * - * Description: Called when the agent terminates up - * - * Input: void - * - * Output: None - * - ****************************************************************************/ - -void deinit_lustresnmp(void) { - - /* deregister ourselves with the agent */ - unregister_mib(clusterFileSystems_variables_oid, - sizeof(clusterFileSystems_variables_oid)/sizeof(clusterFileSystems_variables_oid)); - - terminate_trap_handler(); - - DEBUGMSGTL(("lsnmpd", "%s %s \n", __FUNCTION__, "Termination Done")); -} - -/***************************************************************************** - * Function: var_clusterFileSystems - * - ****************************************************************************/ -unsigned char * -var_clusterFileSystems(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - - - /* variables we may use later */ - static long long_ret; - static u_long ulong_ret; - static unsigned char string[SPRINT_MAX_LEN]; - char file_path[MAX_PATH_SIZE]; - uint32_t num; - char *dir_list; - - if (header_generic(vp,name,length,exact,var_len,write_method) - == MATCH_FAILED ) - return NULL; - - - /* - * this is where we do the value assignments for the mib results. - */ - switch(vp->magic) { - - case SYSVERSION: - sprintf(file_path, "%s%s", LUSTRE_PATH,"version"); - if( SUCCESS != read_string(file_path, string,sizeof(string))) - return NULL; - *var_len = strlen(string); - return (unsigned char *) string; - - case SYSKERNELVERSION: - sprintf(file_path, "%s%s", LUSTRE_PATH,"kernel_version"); - if( SUCCESS != read_string(file_path, string,sizeof(string))) - return NULL; - *var_len = strlen(string); - return (unsigned char *) string; - - case SYSHEALTHCHECK: - sprintf(file_path, "%s%s", LUSTRE_PATH,FILENAME_SYSHEALTHCHECK); - if( SUCCESS != read_string(file_path, string,sizeof(string))) - return NULL; - *var_len = strlen(string); - return (unsigned char *) string; - - case SYSSTATUS: - *write_method = write_sysStatus; - long_ret = (long) get_sysstatus(); - if (long_ret != ERROR) - return (unsigned char *) &long_ret; - return NULL; - - case OSDNUMBER: - if( 0 == (dir_list = get_file_list(OSD_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",OSD_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case OSCNUMBER: - if( 0 == (dir_list = get_file_list(OSC_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",OSC_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case MDDNUMBER: - if( 0 == (dir_list = get_file_list(MDS_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",MDS_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case MDCNUMBER: - if( 0 == (dir_list = get_file_list(MDC_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",MDC_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case CLIMOUNTNUMBER: - if( 0 == (dir_list = get_file_list(CLIENT_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",CLIENT_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case LOVNUMBER: - if( 0 == (dir_list = get_file_list(LOV_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",LOV_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - case LDLMNUMBER: - if( 0 == (dir_list = get_file_list(LDLM_PATH, DIR_TYPE, &num))) - return NULL; - DEBUGMSGTL(("lsnmpd","num(%s)=%d\n",LDLM_PATH,num)); - ulong_ret = num; - free(dir_list); - return (unsigned char *) &ulong_ret; - - default: - ERROR_MSG(""); - } - return NULL; -} - -struct oid_table osd_table[] = -{ - { OSDUUID,FILENAME_UUID,oid_table_string_handler}, - { OSDCOMMONNAME,0,oid_table_obj_name_handler}, - { OSDCAPACITY,FILENAME_KBYTES_TOTAL, oid_table_c64_kb_handler}, - { OSDFREECAPACITY,FILENAME_KBYTES_FREE, oid_table_c64_kb_handler}, - { OSDOBJECTS,FILENAME_FILES_TOTAL, oid_table_c64_kb_handler}, - { OSDFREEOBJECTS,FILENAME_FILES_FREE, oid_table_c64_kb_handler}, - { 0,0,0 } /*End of table*/ -}; - - -/***************************************************************************** - * Function: var_osdTable - * - ****************************************************************************/ -unsigned char * -var_osdTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - OSD_PATH,osd_table); -} - -struct oid_table osc_table[] = -{ - { OSCUUID,FILENAME_UUID,oid_table_string_handler}, - { OSCCOMMONNAME,0,oid_table_obj_name_handler}, - { OSCOSTSERVERUUID,"ost_server_uuid",oid_table_string_handler}, - { OSCCAPACITY,FILENAME_KBYTES_TOTAL, oid_table_c64_kb_handler}, - { OSCFREECAPACITY,FILENAME_KBYTES_FREE, oid_table_c64_kb_handler}, - { OSCOBJECTS,FILENAME_FILES_TOTAL, oid_table_c64_kb_handler}, - { OSCFREEOBJECTS,FILENAME_FILES_FREE, oid_table_c64_kb_handler}, - { 0,0,0 } /*End of table*/ -}; - -/***************************************************************************** - * Function: var_oscTable - * - ****************************************************************************/ -unsigned char * -var_oscTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - OSC_PATH,osc_table); -} - -struct oid_table mds_table[] = -{ - { MDDUUID,FILENAME_UUID,oid_table_string_handler}, - { MDDCOMMONNAME,0,oid_table_obj_name_handler}, - { MDDCAPACITY,FILENAME_KBYTES_TOTAL, oid_table_c64_kb_handler}, - { MDDFREECAPACITY,FILENAME_KBYTES_FREE, oid_table_c64_kb_handler}, - { MDDFILES,FILENAME_FILES_TOTAL, oid_table_c64_kb_handler}, - { MDDFREEFILES,FILENAME_FILES_FREE, oid_table_c64_kb_handler}, - { 0,0,0 } /*End of table*/ -}; - -/***************************************************************************** - * Function: var_mdsTable - * - ****************************************************************************/ -unsigned char * -var_mdsTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - MDS_PATH,mds_table); -} - -struct oid_table mdc_table[] = -{ - { MDCUUID,FILENAME_UUID,oid_table_string_handler}, - { MDCCOMMONNAME,0,oid_table_obj_name_handler}, - { MDCMDSSERVERUUID,"mds_server_uuid",oid_table_string_handler}, - { MDCCAPACITY,FILENAME_KBYTES_TOTAL, oid_table_c64_kb_handler}, - { MDCFREECAPACITY,FILENAME_KBYTES_FREE, oid_table_c64_kb_handler}, - { MDCOBJECTS,FILENAME_FILES_TOTAL, oid_table_c64_kb_handler}, - { MDCFREEOBJECTS,FILENAME_FILES_FREE, oid_table_c64_kb_handler}, - { 0,0,0 } /*End of table*/ -}; - - -/***************************************************************************** - * Function: var_mdcTable - * - ****************************************************************************/ -unsigned char * -var_mdcTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - MDC_PATH,mdc_table); -} - - -struct oid_table cli_table[] = -{ - { CLIUUID,FILENAME_UUID,oid_table_string_handler}, - { CLICOMMONNAME,0,oid_table_obj_name_handler}, - { CLIMDCUUID,"mdc/" FILENAME_UUID,oid_table_string_handler}, - { CLIMDCCOMMONNAME,"mdc/" FILENAME_COMMON_NAME,oid_table_string_handler}, - { CLIUSESLOV,"lov/",oid_table_is_directory_handler}, - { CLILOVUUID,"lov/" FILENAME_UUID,oid_table_string_handler}, - { CLILOVCOMMONNAME,"lov/" FILENAME_COMMON_NAME,oid_table_string_handler}, - { 0,0,0 } /*End of table*/ -}; - -/***************************************************************************** - * Function: var_cliTable - * - ****************************************************************************/ -unsigned char * -var_cliTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - CLIENT_PATH,cli_table); -} - - -struct oid_table lov_table[] = -{ - { LOVUUID,FILENAME_UUID,oid_table_string_handler}, - { LOVCOMMONNAME,0,oid_table_obj_name_handler}, - { LOVNUMOBD,"numobd", oid_table_ulong_handler}, - { LOVNUMACTIVEOBD,"activeobd", oid_table_ulong_handler}, - { LOVCAPACITY,FILENAME_KBYTES_TOTAL, oid_table_c64_kb_handler}, - { LOVFREECAPACITY,FILENAME_KBYTES_FREE, oid_table_c64_kb_handler}, - { LOVFILES,FILENAME_FILES_TOTAL, oid_table_c64_kb_handler}, - { LOVFREEFILES,FILENAME_FILES_FREE, oid_table_c64_kb_handler}, - { LOVSTRIPECOUNT,"stripecount", oid_table_ulong_handler}, - { LOVSTRIPEOFFSET,"stripeoffset", oid_table_ulong_handler}, - { LOVSTRIPESIZE,"stripesize", oid_table_ulong_handler}, - { LOVSTRIPETYPE,"stripetype", oid_table_ulong_handler}, - { 0,0,0 } /*End of table*/ -}; - - -/***************************************************************************** - * Function: var_lovTable - * - ****************************************************************************/ -unsigned char * -var_lovTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - LOV_PATH,lov_table); -} - -struct oid_table ldlm_table[] = -{ - { LDLMNAMESPACE,0,oid_table_obj_name_handler}, - { LDLMLOCKCOUNT,"lock_count", oid_table_ulong_handler}, - { LDLMUNUSEDLOCKCOUNT,"lock_unused_count", oid_table_ulong_handler}, - { LDLMRESOURCECOUNT,"resource_count", oid_table_ulong_handler}, - { 0,0,0 } /*End of table*/ -}; - - -/***************************************************************************** - * Function: var_ldlmTable - * - ****************************************************************************/ -unsigned char * -var_ldlmTable(struct variable *vp, - oid *name, - size_t *length, - int exact, - size_t *var_len, - WriteMethod **write_method) -{ - return var_genericTable(vp,name,length,exact,var_len,write_method, - LDLM_PATH,ldlm_table); -} - - -/***************************************************************************** - * Function: write_sysStatus - * - ****************************************************************************/ -int -write_sysStatus(int action, - u_char *var_val, - u_char var_val_type, - size_t var_val_len, - u_char *statP, - oid *name, - size_t name_len) -{ - static long *long_ret; - int size; - int pid, new_value; - - - - switch ( action ) { - case RESERVE1: - if (var_val_type != ASN_INTEGER){ - fprintf(stderr, "write to sysStatus not ASN_INTEGER\n"); - return SNMP_ERR_WRONGTYPE; - } - if (var_val_len > sizeof(long_ret)){ - fprintf(stderr,"write to sysStatus: bad length\n"); - return SNMP_ERR_WRONGLENGTH; - } - if ((*var_val != ONLINE) && - (*var_val != OFFLINE) && - (*var_val != RESTART)){ - report("%s %s:line %d %s", __FILE__, __FUNCTION__, __LINE__, - "sysStatus value is invalid."); - return SNMP_ERR_WRONGVALUE; - } - break; - - - case RESERVE2: - size = var_val_len; - long_ret = (long *) var_val; - - - break; - - - case FREE: - /* Release any resources that have been allocated */ - break; - - - case ACTION: - /* The variable has been stored in long_ret for - you to use, and you have just been asked to do something with - it. Note that anything done here must be reversable in the UNDO case */ - new_value = *(int *) var_val; - switch (new_value) { - case ONLINE: - lustrefs_ctrl(ONLINE); - break; - - case OFFLINE: - lustrefs_ctrl(OFFLINE); - break; - - case RESTART: - lustrefs_ctrl(RESTART); - break; - - default: - break; - } - break; - - - case UNDO: - /* Back out any changes made in the ACTION case */ - break; - - - case COMMIT: - /* Things are working well, so it's now safe to make the change - permanently. Make sure that anything done here can't fail! */ - break; - } - return SNMP_ERR_NOERROR; -} - diff --git a/snmp/lustre-snmp.h b/snmp/lustre-snmp.h deleted file mode 100644 index c4c10e0b4452b02cb9e584c748b44d0b22aa8eef..0000000000000000000000000000000000000000 --- a/snmp/lustre-snmp.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2005 Cluster File Systems, Inc. - * Author: PJ Kirner <pjkirner@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef LUSTRE_SNMP_H -#define LUSTRE_SNMP_H - -#include "lustre-snmp-util.h" - -config_require(util_funcs) -config_add_mib(LUSTRE-MIB) -config_require(lustre/cfs_util) -config_require(lustre/cfs_trap) - -/* function prototypes */ -void init_cfsNetSNMPPlugin(void); -FindVarMethod var_clusterFileSystems; -FindVarMethod var_osdTable; -FindVarMethod var_oscTable; -FindVarMethod var_mdsTable; -FindVarMethod var_mdcTable; -FindVarMethod var_cliTable; -FindVarMethod var_ldlmTable; -FindVarMethod var_lovTable; -WriteMethod write_sysStatus; - -#endif /* LUSTRE_SNMP_H */