-
Brian J. Murrell authored
i=Ricardo.M.Correia i=adilger Fix macro to conform to function-like-macros: http://gcc.gnu.org/onlinedocs/cpp/Function_002dlike-Macros.html#Function_002dlike-Macros Note that this error is in the upstream patches from Qaudrics.
Brian J. Murrell authoredi=Ricardo.M.Correia i=adilger Fix macro to conform to function-like-macros: http://gcc.gnu.org/onlinedocs/cpp/Function_002dlike-Macros.html#Function_002dlike-Macros Note that this error is in the upstream patches from Qaudrics.
qsnet-rhel4-2.6.patch 49.92 KiB
Index: linux-269-5502/fs/open.c
===================================================================
--- linux-269-5502.orig/fs/open.c
+++ linux-269-5502/fs/open.c
@@ -1029,6 +1029,8 @@ out_error:
goto out;
}
+EXPORT_SYMBOL(sys_open);
+
#ifndef __alpha__
/*
Index: linux-269-5502/fs/read_write.c
===================================================================
--- linux-269-5502.orig/fs/read_write.c
+++ linux-269-5502/fs/read_write.c
@@ -145,6 +145,7 @@ asmlinkage off_t sys_lseek(unsigned int
bad:
return retval;
}
+EXPORT_SYMBOL(sys_lseek);
#ifdef __ARCH_WANT_SYS_LLSEEK
asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high,
Index: linux-269-5502/fs/select.c
===================================================================
--- linux-269-5502.orig/fs/select.c
+++ linux-269-5502/fs/select.c
@@ -539,3 +539,4 @@ out_fds:
poll_freewait(&table);
return err;
}
+EXPORT_SYMBOL_GPL(sys_poll);
Index: linux-269-5502/fs/exec.c
===================================================================
--- linux-269-5502.orig/fs/exec.c
+++ linux-269-5502/fs/exec.c
@@ -56,6 +56,8 @@
#include <linux/kmod.h>
#endif
+#include <linux/ptrack.h>
+
int core_uses_pid;
char core_pattern[65] = "core";
int suid_dumpable = 0;
@@ -1214,6 +1216,9 @@ int do_execve(char * filename,
if (retval < 0)
goto out;
+ /* notify any ptrack callbacks of the process exec */
+ ptrack_call_callbacks(PTRACK_PHASE_EXEC, NULL);
+
retval = search_binary_handler(bprm,regs);
if (retval >= 0) {
free_arg_pages(bprm);
Index: linux-269-5502/arch/i386/Kconfig
===================================================================
--- linux-269-5502.orig/arch/i386/Kconfig
+++ linux-269-5502/arch/i386/Kconfig
@@ -960,6 +960,9 @@ config REGPARM
generate incorrect output with certain kernel constructs when
-mregparm=3 is used.
+source "mm/Kconfig"
+source "kernel/Kconfig"
+
endmenu
Index: linux-269-5502/arch/i386/defconfig
===================================================================
--- linux-269-5502.orig/arch/i386/defconfig
+++ linux-269-5502/arch/i386/defconfig
@@ -119,6 +119,8 @@ CONFIG_MTRR=y
CONFIG_IRQBALANCE=y
CONFIG_HAVE_DEC_LOCK=y
# CONFIG_REGPARM is not set
+CONFIG_IOPROC=y
+CONFIG_PTRACK=y
#
# Power management options (ACPI, APM)
Index: linux-269-5502/arch/ia64/Kconfig
===================================================================
--- linux-269-5502.orig/arch/ia64/Kconfig
+++ linux-269-5502/arch/ia64/Kconfig
@@ -316,6 +316,9 @@ config IA64_PALINFO
To use this option, you have to ensure that the "/proc file system
support" (CONFIG_PROC_FS) is enabled, too.
+source "mm/Kconfig"
+source "kernel/Kconfig"
+
source "drivers/firmware/Kconfig"
source "fs/Kconfig.binfmt"
Index: linux-269-5502/arch/ia64/defconfig
===================================================================
--- linux-269-5502.orig/arch/ia64/defconfig
+++ linux-269-5502/arch/ia64/defconfig
@@ -83,6 +83,8 @@ CONFIG_IA32_SUPPORT=y
CONFIG_COMPAT=y
CONFIG_PERFMON=y
CONFIG_IA64_PALINFO=y
+CONFIG_IOPROC=y
+CONFIG_PTRACK=y
#
# Firmware Drivers
Index: linux-269-5502/arch/x86_64/Kconfig
===================================================================
--- linux-269-5502.orig/arch/x86_64/Kconfig
+++ linux-269-5502/arch/x86_64/Kconfig
@@ -401,6 +401,9 @@ config X86_MCE_AMD
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
+source "mm/Kconfig"
+source "kernel/Kconfig"
+
endmenu
Index: linux-269-5502/arch/x86_64/defconfig
===================================================================
--- linux-269-5502.orig/arch/x86_64/defconfig
+++ linux-269-5502/arch/x86_64/defconfig
@@ -87,6 +87,8 @@ CONFIG_NR_CPUS=8
CONFIG_GART_IOMMU=y
CONFIG_SWIOTLB=y
CONFIG_X86_MCE=y
+CONFIG_IOPROC=y
+CONFIG_PTRACK=y
#
# Power management options
Index: linux-269-5502/kernel/ptrack.c
===================================================================
--- /dev/null
+++ linux-269-5502/kernel/ptrack.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2000 Regents of the University of California
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Derived from exit_actn.c by
+ * Copyright (C) 2003 Quadrics Ltd.
+ */
+
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/ptrack.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#include <asm/errno.h>
+
+int
+ptrack_register (ptrack_callback_t callback, void *arg)
+{
+ struct ptrack_desc *desc = kmalloc (sizeof (struct ptrack_desc), GFP_KERNEL);
+
+ if (desc == NULL)
+ return -ENOMEM;
+
+ desc->callback = callback;
+ desc->arg = arg;
+
+ list_add_tail (&desc->link, ¤t->ptrack_list);
+
+ return 0;
+}
+
+void
+ptrack_deregister (ptrack_callback_t callback, void *arg)
+{
+ struct list_head *el, *nel;
+
+ list_for_each_safe (el, nel, ¤t->ptrack_list) {
+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link);
+
+ if (desc->callback == callback && desc->arg == arg) {
+ list_del (&desc->link);
+ kfree (desc);
+ }
+ }
+}
+
+int
+ptrack_registered (ptrack_callback_t callback, void *arg)
+{
+ struct list_head *el;
+
+ list_for_each (el, ¤t->ptrack_list) {
+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link);
+
+ if (desc->callback == callback && desc->arg == arg)
+ return 1;
+ }
+ return 0;
+}
+
+int
+ptrack_call_callbacks (int phase, struct task_struct *child)
+{
+ struct list_head *el, *nel;
+ struct ptrack_desc *new;
+ int res;
+
+ if (phase == PTRACK_PHASE_CLONE)
+ INIT_LIST_HEAD (&child->ptrack_list);
+
+ list_for_each_safe (el, nel, ¤t->ptrack_list) {
+ struct ptrack_desc *desc = list_entry (el, struct ptrack_desc, link);
+
+ res = desc->callback (desc->arg, phase, child);
+
+ switch (phase)
+ {
+ case PTRACK_PHASE_EXIT:
+ list_del (&desc->link);
+ kfree (desc);
+ break;
+
+ case PTRACK_PHASE_CLONE:
+ switch (res)
+ {
+ case PTRACK_FINISHED:
+ break;
+
+ case PTRACK_INNHERIT:
+ if ((new = kmalloc (sizeof (struct ptrack_desc), GFP_ATOMIC)) == NULL)
+ {
+ /* allocation failed - notify that this process is not going
+ * to be started by signalling clone failure.
+ */
+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child);
+
+ goto failed;
+ }
+
+ new->callback = desc->callback;
+ new->arg = desc->arg;
+
+ list_add_tail (&new->link, &child->ptrack_list);
+ break;
+
+ case PTRACK_DENIED:
+ goto failed;
+ }
+ break;
+ }
+ }
+
+ return 0;
+
+ failed:
+ while (! list_empty (&child->ptrack_list))
+ {
+ struct ptrack_desc *desc = list_entry (child->ptrack_list.next, struct ptrack_desc, link);
+
+ desc->callback (desc->arg, PTRACK_PHASE_CLONE_FAIL, child);
+
+ list_del (&desc->link);
+ kfree (desc);
+ }
+ return 1;
+}
+EXPORT_SYMBOL(ptrack_register);
+EXPORT_SYMBOL(ptrack_deregister);
+EXPORT_SYMBOL(ptrack_registered);
Index: linux-269-5502/kernel/signal.c
===================================================================
--- linux-269-5502.orig/kernel/signal.c
+++ linux-269-5502/kernel/signal.c
@@ -2329,6 +2329,7 @@ sys_kill(int pid, int sig)
return kill_something_info(sig, &info, pid);
}
+EXPORT_SYMBOL_GPL(sys_kill);
/**
* sys_tgkill - send signal to one specific thread
Index: linux-269-5502/kernel/Kconfig
===================================================================
--- /dev/null
+++ linux-269-5502/kernel/Kconfig
@@ -0,0 +1,14 @@
+#
+# Kernel subsystem specific config
+#
+
+# Support for Process Tracking callbacks
+#
+config PTRACK
+ bool "Enable PTRACK process tracking hooks"
+ default y
+ help
+ This option enables hooks to be called when processes are
+ created and destoryed in order for a resource management
+ system to know which processes are a member of a "job" and
+ to be able to clean up when the job is terminated.
Index: linux-269-5502/kernel/Makefile
===================================================================
--- linux-269-5502.orig/kernel/Makefile
+++ linux-269-5502/kernel/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_AUDITFILESYSTEM) += auditfs.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_PTRACK) += ptrack.o
ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
Index: linux-269-5502/kernel/exit.c
===================================================================
--- linux-269-5502.orig/kernel/exit.c
+++ linux-269-5502/kernel/exit.c
@@ -32,6 +32,8 @@
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#include <linux/ptrack.h>
+
extern void sem_exit (void);
extern struct task_struct *child_reaper;
@@ -825,6 +827,9 @@ asmlinkage NORET_TYPE void do_exit(long
current->tux_exit();
}
+ /* Notify any ptrack callbacks of the process exit */
+ ptrack_call_callbacks(PTRACK_PHASE_EXIT, NULL);
+
if (unlikely(tsk->audit_context))
audit_free(tsk);
__exit_mm(tsk);
Index: linux-269-5502/kernel/fork.c
===================================================================
--- linux-269-5502.orig/kernel/fork.c
+++ linux-269-5502/kernel/fork.c
@@ -14,6 +14,7 @@
#include <linux/config.h>
#include <linux/slab.h>
#include <linux/init.h>
+#include <linux/ptrack.h>
#include <linux/unistd.h>
#include <linux/smp_lock.h>
#include <linux/module.h>
@@ -443,6 +444,9 @@ static struct mm_struct * mm_init(struct
mm->page_table_lock = SPIN_LOCK_UNLOCKED;
mm->ioctx_list_lock = RW_LOCK_UNLOCKED;
mm->ioctx_list = NULL;
+#ifdef CONFIG_IOPROC
+ mm->ioproc_ops = NULL;
+#endif
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
mm->free_area_cache = TASK_UNMAPPED_BASE;
@@ -1312,6 +1316,11 @@ long do_fork(unsigned long clone_flags,
set_tsk_thread_flag(p, TIF_SIGPENDING);
}
+ if (ptrack_call_callbacks(PTRACK_PHASE_CLONE, p)) {
+ sigaddset(&p->pending.signal, SIGKILL);
+ set_tsk_thread_flag(p, TIF_SIGPENDING);
+ }
+
if (!(clone_flags & CLONE_STOPPED))
wake_up_new_task(p, clone_flags);
else
Index: linux-269-5502/Makefile
===================================================================
--- linux-269-5502.orig/Makefile
+++ linux-269-5502/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 9
-EXTRAVERSION = -prep
+EXTRAVERSION = -prep.qp3.5.34.4qsnet
RHEL_VERSION = 4
RHEL_UPDATE = 5
NAME=AC 1
Index: linux-269-5502/Documentation/vm/ioproc.txt
===================================================================
--- /dev/null
+++ linux-269-5502/Documentation/vm/ioproc.txt
@@ -0,0 +1,467 @@
+Linux IOPROC patch overview
+===========================
+
+The network interface for an HPC network differs significantly from
+network interfaces for traditional IP networks. HPC networks tend to
+be used directly from user processes and perform large RDMA transfers
+between theses processes address space. They also have a requirement
+for low latency communication, and typically achieve this by OS bypass
+techniques. This then requires a different model to traditional
+interconnects, in that a process may need to expose a large amount of
+it's address space to the network RDMA.
+
+Locking down of memory has been a common mechanism for performing
+this, together with a pin-down cache implemented in user
+libraries. The disadvantage of this method is that large portions of
+the physical memory can be locked down for a single process, even if
+it's working set changes over the different phases of it's
+execution. This leads to inefficient memory utilisation - akin to the
+disadvantage of swapping compared to paging.
+
+This model also has problems where memory is being dynamically
+allocated and freed, since the pin down cache is unaware that memory
+may have been released by a call to munmap() and so it will still be
+locking down the now unused pages.
+
+Some modern HPC network interfaces implement their own MMU and are
+able to handle a translation fault during a network access. The
+Quadrics (http://www.quadrics.com) devices (Elan3 and Elan4) have done
+this for some time and we expect others to follow the same route in
+the relatively near future. These NICs are able to operate in an
+environment where paging occurs and do not require memory to be locked
+down. The advantage of this is that the user process can expose large
+portions of it's address space without having to worry about physical
+memory constraints.
+
+However should the operating system decide to swap a page to disk,
+then the NIC must be made aware that it should no longer read/write
+from this memory, but should generate a translation fault instead.
+
+The ioproc patch has been developed to provide a mechanism whereby the
+device driver for a NIC can be aware of when a user process's address
+translations change, either by paging or by explicitly mapping or
+unmapping memory.
+
+The patch involves inserting callbacks where translations are being
+invalidated to notify the NIC that the memory behind those
+translations is no longer visible to the application (and so should
+not be visible to the NIC). This callback is then responsible for
+ensuring that the NIC will not access the physical memory that was
+being mapped.
+
+An ioproc invalidate callback in the kswapd code could be utilised to
+prevent memory from being paged out if the NIC is unable to support
+network page faulting.
+
+For NICs which support network page faulting, there is no requirement
+for a user level pin down cache, since they are able to page-in their
+translations on the first communication using a buffer. However this
+is likely to be inefficient, resulting in slow first use of the
+buffer. If the communication buffers were continually allocated and
+freed using mmap based malloc() calls then this would lead to all
+communications being slower than desirable.
+
+To optimise these warm-up cases the ioproc patch adds calls to
+ioproc_update wherever the kernel is creating translations for a user
+process. These then allows the device driver to preload translations
+so that they are already present for the first network communication
+from a buffer.
+
+Linux 2.6 IOPROC implementation details
+=======================================
+
+The Linux IOPROC patch adds hooks to the Linux VM code whenever page
+table entries are being created and/or invalidated. IOPROC device
+drivers can register their interest in being informed of such changes
+by registering an ioproc_ops structure which is defined as follows;
+
+extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip);
+extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip);
+
+typedef struct ioproc_ops {
+ struct ioproc_ops *next;
+ void *arg;
+
+ void (*release)(void *arg, struct mm_struct *mm);
+ void (*sync_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end);
+ void (*invalidate_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end);
+ void (*update_range)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end);
+
+ void (*change_protection)(void *arg, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgprot_t newprot);
+
+ void (*sync_page)(void *arg, struct vm_area_struct *vma, unsigned long address);
+ void (*invalidate_page)(void *arg, struct vm_area_struct *vma, unsigned long address);
+ void (*update_page)(void *arg, struct vm_area_struct *vma, unsigned long address);
+
+} ioproc_ops_t;
+
+ioproc_register_ops
+===================
+This function should be called by the IOPROC device driver to register
+it's interest in PTE changes for the process associated with the passed
+in mm_struct.
+
+The ioproc registration is not inherited across fork() and should be
+called once for each process that IOPROC is interested in.
+
+This function must be called whilst holding the mm->page_table_lock.
+
+ioproc_unregister_ops
+=====================
+This function should be called by the IOPROC device driver when it no
+longer requires informing of PTE changes in the process associated
+with the supplied mm_struct.
+
+This function is not normally needed to be called as the ioproc_ops
+struct is unlinked from the associated mm_struct during the
+ioproc_release() call.
+
+This function must be called whilst holding the mm->page_table_lock.
+
+ioproc_ops struct
+=================
+A linked list ioproc_ops structures is hung off the user process
+mm_struct (linux/sched.h). At each hook point in the patched kernel,
+the ioproc patch will call the associated ioproc_ops callback function
+pointer in turn for each registered structure.
+
+The intention of the callbacks is to allow the IOPROC device driver to
+inspect the new or modified PTE entry via the Linux kernel
+(e.g. find_pte_map()). These callbacks should not modify the Linux
+kernel VM state or PTE entries.
+
+The ioproc_ops callback function pointers are:
+
+ioproc_release
+==============
+The release hook is called when a program exits and all it's vma areas
+are torn down and unmapped, i.e. during exit_mmap(). Before each
+release hook is called the ioproc_ops structure is unlinked from the
+mm_struct.
+
+No locks are required as the process has the only reference to the mm
+at this point.
+
+ioproc_sync_[range|page]
+========================
+The sync hooks are called when a memory map is synchronised with its
+disk image i.e. when the msync() syscall is invoked. Any future read
+or write by the IOPROC device to the associated pages should cause the
+page to be marked as referenced or modified.
+
+Called holding the mm->page_table_lock.
+
+ioproc_invalidate_[range|page]
+==============================
+The invalidate hooks are called whenever a valid PTE is unloaded
+e.g. when a page is unmapped by the user or paged out by the
+kernel. After this call the IOPROC must not access the physical memory
+again unless a new translation is loaded.
+
+Called holding the mm->page_table_lock.
+
+ioproc_update_[range|page]
+==========================
+The update hooks are called whenever a valid PTE is loaded
+e.g. mmaping memory, moving the brk up, when breaking COW or faulting
+in an anonymous page of memory. These give the IOPROC device the
+opportunity to load translations speculatively, which can improve
+performance by avoiding device translation faults.
+
+Called holding the mm->page_table_lock.
+
+ioproc_change_protection
+========================
+This hook is called when the protection on a region of memory is
+changed i.e. when the mprotect() syscall is invoked.
+
+The IOPROC must not be able to write to a read-only page, so if the
+permissions are downgraded then it must honour them. If they are
+upgraded it can treat this in the same way as the
+ioproc_update_[range|page]() calls.
+
+Called holding the mm->page_table_lock.
+
+
+Linux 2.6 IOPROC patch details
+==============================
+
+Here are the specific details of each ioproc hook added to the Linux
+2.6 VM system and the reasons for doing so:
+
+++++ FILE
+ mm/fremap.c
+
+==== FUNCTION
+ zap_pte
+
+CALLED FROM
+ install_page
+ install_file_pte
+
+PTE MODIFICATION
+ ptep_clear_flush
+
+ADDED HOOKS
+ ioproc_invalidate_page
+
+==== FUNCTION
+ install_page
+
+CALLED FROM
+ filemap_populate, shmem_populate
+
+PTE MODIFICATION
+ set_pte
+
+ADDED HOOKS
+ ioproc_update_page
+
+==== FUNCTION
+ install_file_pte
+
+CALLED FROM
+ filemap_populate, shmem_populate
+
+PTE MODIFICATION
+ set_pte
+
+ADDED HOOKS
+ ioproc_update_page
+
+
+++++ FILE
+ mm/memory.c
+
+==== FUNCTION
+ zap_page_range
+
+CALLED FROM
+ read_zero_pagealigned, madvise_dontneed, unmap_mapping_range,
+ unmap_mapping_range_list, do_mmap_pgoff
+
+PTE MODIFICATION
+ set_pte (unmap_vmas)
+
+ADDED HOOKS
+ ioproc_invalidate_range
+
+
+==== FUNCTION
+ zeromap_page_range
+
+CALLED FROM
+ read_zero_pagealigned, mmap_zero
+
+PTE MODIFICATION
+ set_pte (zeromap_pte_range)
+
+ADDED HOOKS
+ ioproc_invalidate_range
+ ioproc_update_range
+
+
+==== FUNCTION
+ remap_page_range
+
+CALLED FROM
+ many device drivers
+
+PTE MODIFICATION
+ set_pte (remap_pte_range)
+
+ADDED HOOKS
+ ioproc_invalidate_range
+ ioproc_update_range
+
+
+==== FUNCTION
+ break_cow
+
+CALLED FROM
+ do_wp_page
+
+PTE MODIFICATION
+ ptep_establish
+
+ADDED HOOKS
+ ioproc_invalidate_page
+ ioproc_update_page
+
+
+==== FUNCTION
+ do_wp_page
+
+CALLED FROM
+ do_swap_page, handle_pte_fault
+
+PTE MODIFICATION
+ ptep_set_access_flags
+
+ADDED HOOKS
+ ioproc_update_page
+
+
+==== FUNCTION
+ do_swap_page
+
+CALLED FROM
+ handle_pte_fault
+
+PTE MODIFICATION
+ set_pte
+
+ADDED HOOKS
+ ioproc_update_page
+
+
+==== FUNCTION
+ do_anonymous_page
+
+CALLED FROM
+ do_no_page
+
+PTE MODIFICATION
+ set_pte
+
+ADDED HOOKS
+ ioproc_update_page
+
+
+==== FUNCTION
+ do_no_page
+
+CALLED FROM
+ do_file_page, handle_pte_fault
+
+PTE MODIFICATION
+ set_pte
+
+ADDED HOOKS
+ ioproc_update_page
+
+
+++++ FILE
+ mm/mmap.c
+
+==== FUNCTION
+ unmap_region
+
+CALLED FROM
+ do_munmap
+
+PTE MODIFICATION
+ set_pte (unmap_vmas)
+
+ADDED HOOKS
+ ioproc_invalidate_range
+
+
+==== FUNCTION
+ exit_mmap
+
+CALLED FROM
+ mmput
+
+PTE MODIFICATION
+ set_pte (unmap_vmas)
+
+ADDED HOOKS
+ ioproc_release
+
+
+++++ FILE
+ mm/mprotect.c
+
+==== FUNCTION
+ change_protection
+
+CALLED FROM
+ mprotect_fixup
+
+PTE MODIFICATION
+ set_pte (change_pte_range)
+
+ADDED HOOKS
+ ioproc_change_protection
+
+
+++++ FILE
+ mm/mremap.c
+
+==== FUNCTION
+ move_page_tables
+
+CALLED FROM
+ move_vma
+
+PTE MODIFICATION
+ ptep_clear_flush (move_one_page)
+
+ADDED HOOKS
+ ioproc_invalidate_range
+ ioproc_invalidate_range
+
+
+++++ FILE
+ mm/rmap.c
+
+==== FUNCTION
+ try_to_unmap_one
+
+CALLED FROM
+ try_to_unmap_anon, try_to_unmap_file
+
+PTE MODIFICATION
+ ptep_clear_flush
+
+ADDED HOOKS
+ ioproc_invalidate_page
+
+
+==== FUNCTION
+ try_to_unmap_cluster
+
+CALLED FROM
+ try_to_unmap_file
+
+PTE MODIFICATION
+ ptep_clear_flush
+
+ADDED HOOKS
+ ioproc_invalidate_page
+
+
+
+++++ FILE
+ mm/msync.c
+
+==== FUNCTION
+ filemap_sync
+
+CALLED FROM
+ msync_interval
+
+PTE MODIFICATION
+ ptep_clear_flush_dirty (filemap_sync_pte)
+
+ADDED HOOKS
+ ioproc_sync_range
+
+
+++++ FILE
+ mm/hugetlb.c
+
+==== FUNCTION
+ zap_hugepage_range
+
+CALLED FROM
+ hugetlb_vmtruncate_list
+
+PTE MODIFICATION
+ ptep_get_and_clear (unmap_hugepage_range)
+
+ADDED HOOK
+ ioproc_invalidate_range
+
+-- Last update Daniel J Blueman - 24 Mar 2006
Index: linux-269-5502/mm/ioproc.c
===================================================================
--- /dev/null
+++ linux-269-5502/mm/ioproc.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2006 Quadrics Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Registration for IO processor page table updates.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/mm.h>
+#include <linux/ioproc.h>
+
+int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip)
+{
+ ip->next = mm->ioproc_ops;
+ mm->ioproc_ops = ip;
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(ioproc_register_ops);
+
+int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip)
+{
+ struct ioproc_ops **tmp;
+
+ for (tmp = &mm->ioproc_ops; *tmp && *tmp != ip; tmp = &(*tmp)->next) ;
+ if (*tmp) {
+ *tmp = ip->next;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+EXPORT_SYMBOL_GPL(ioproc_unregister_ops);
Index: linux-269-5502/mm/hugetlb.c
===================================================================
--- linux-269-5502.orig/mm/hugetlb.c
+++ linux-269-5502/mm/hugetlb.c
@@ -10,6 +10,7 @@
#include <linux/hugetlb.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
+#include <linux/ioproc.h>
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
static unsigned long nr_huge_pages, free_huge_pages;
@@ -260,6 +261,7 @@ void zap_hugepage_range(struct vm_area_s
struct mm_struct *mm = vma->vm_mm;
spin_lock(&mm->page_table_lock);
+ ioproc_invalidate_range(vma, start, start + length);
unmap_hugepage_range(vma, start, start + length);
spin_unlock(&mm->page_table_lock);
}
Index: linux-269-5502/mm/Kconfig
===================================================================
--- /dev/null
+++ linux-269-5502/mm/Kconfig
@@ -0,0 +1,15 @@
+#
+# VM subsystem specific config
+#
+
+# Support for IO processors which have advanced RDMA capabilities
+#
+config IOPROC
+ bool "Enable IOPROC VM hooks"
+ depends on MMU
+ default y
+ help
+ This option enables hooks in the VM subsystem so that IO devices which
+ incorporate advanced RDMA capabilities can be kept in sync with CPU
+ page table changes.
+ See Documentation/vm/ioproc.txt for more details.
Index: linux-269-5502/mm/Makefile
===================================================================
--- linux-269-5502.orig/mm/Makefile
+++ linux-269-5502/mm/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_SWAP) += page_io.o swap_sta
obj-$(CONFIG_X86_4G) += usercopy.o
obj-$(CONFIG_HUGETLBFS) += hugetlb.o
obj-$(CONFIG_NUMA) += mempolicy.o
+obj-$(CONFIG_IOPROC) += ioproc.o
obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
Index: linux-269-5502/mm/mprotect.c
===================================================================
--- linux-269-5502.orig/mm/mprotect.c
+++ linux-269-5502/mm/mprotect.c
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/ioproc.h>
#include <linux/slab.h>
#include <linux/shm.h>
#include <linux/mman.h>
@@ -100,6 +101,7 @@ change_protection(struct vm_area_struct
if (start >= end)
BUG();
spin_lock(¤t->mm->page_table_lock);
+ ioproc_change_protection(vma, start, end, newprot);
do {
change_pmd_range(dir, start, end - start, newprot);
start = (start + PGDIR_SIZE) & PGDIR_MASK;
Index: linux-269-5502/mm/msync.c
===================================================================
--- linux-269-5502.orig/mm/msync.c
+++ linux-269-5502/mm/msync.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/hugetlb.h>
+#include <linux/ioproc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -115,6 +116,7 @@ static int filemap_sync(struct vm_area_s
if (address >= end)
BUG();
+ ioproc_sync_range(vma, address, end);
do {
error |= filemap_sync_pmd_range(dir, address, end, vma, flags);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
Index: linux-269-5502/mm/mremap.c
===================================================================
--- linux-269-5502.orig/mm/mremap.c
+++ linux-269-5502/mm/mremap.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/ioproc.h>
#include <linux/slab.h>
#include <linux/shm.h>
#include <linux/mman.h>
@@ -148,6 +149,8 @@ static unsigned long move_page_tables(st
{
unsigned long offset;
+ ioproc_invalidate_range(vma, old_addr, old_addr + len);
+ ioproc_invalidate_range(vma, new_addr, new_addr + len);
flush_cache_range(vma, old_addr, old_addr + len);
/*
Index: linux-269-5502/mm/fremap.c
===================================================================
--- linux-269-5502.orig/mm/fremap.c
+++ linux-269-5502/mm/fremap.c
@@ -12,6 +12,7 @@
#include <linux/mman.h>
#include <linux/pagemap.h>
#include <linux/swapops.h>
+#include <linux/ioproc.h>
#include <linux/rmap.h>
#include <linux/module.h>
@@ -29,6 +30,7 @@ static inline void zap_pte(struct mm_str
if (pte_present(pte)) {
unsigned long pfn = pte_pfn(pte);
+ ioproc_invalidate_page(vma, addr);
flush_cache_page(vma, addr);
pte = ptep_clear_flush(vma, addr, ptep);
if (pfn_valid(pfn)) {
@@ -93,6 +95,7 @@ int install_page(struct mm_struct *mm, s
pte_val = *pte;
pte_unmap(pte);
update_mmu_cache(vma, addr, pte_val);
+ ioproc_update_page(vma, addr);
err = 0;
err_unlock:
@@ -132,6 +135,7 @@ int install_file_pte(struct mm_struct *m
pte_val = *pte;
pte_unmap(pte);
update_mmu_cache(vma, addr, pte_val);
+ ioproc_update_page(vma, addr);
spin_unlock(&mm->page_table_lock);
return 0;
Index: linux-269-5502/mm/rmap.c
===================================================================
--- linux-269-5502.orig/mm/rmap.c
+++ linux-269-5502/mm/rmap.c
@@ -51,6 +51,7 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/rmap.h>
+#include <linux/ioproc.h>
#include <linux/rcupdate.h>
#include <asm/tlbflush.h>
@@ -566,6 +567,7 @@ static int try_to_unmap_one(struct page
}
/* Nuke the page table entry. */
+ ioproc_invalidate_page(vma, address);
flush_cache_page(vma, address);
pteval = ptep_clear_flush(vma, address, pte);
@@ -673,6 +675,7 @@ static void try_to_unmap_cluster(unsigne
continue;
/* Nuke the page table entry. */
+ ioproc_invalidate_page(vma, address);
flush_cache_page(vma, address);
pteval = ptep_clear_flush(vma, address, pte);
Index: linux-269-5502/mm/memory.c
===================================================================
--- linux-269-5502.orig/mm/memory.c
+++ linux-269-5502/mm/memory.c
@@ -43,6 +43,7 @@
#include <linux/swap.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/ioproc.h>
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -630,6 +631,7 @@ void zap_page_range(struct vm_area_struc
lru_add_drain();
spin_lock(&mm->page_table_lock);
+ ioproc_invalidate_range(vma, address, end);
tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
tlb_finish_mmu(tlb, address, end);
@@ -998,6 +1000,7 @@ int zeromap_page_range(struct vm_area_st
BUG();
spin_lock(&mm->page_table_lock);
+ ioproc_invalidate_range(vma, beg, end);
do {
pmd_t *pmd = pmd_alloc(mm, dir, address);
error = -ENOMEM;
@@ -1012,6 +1015,7 @@ int zeromap_page_range(struct vm_area_st
/*
* Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
*/
+ ioproc_update_range(vma, beg, end);
flush_tlb_range(vma, beg, end);
spin_unlock(&mm->page_table_lock);
return error;
@@ -1092,6 +1096,7 @@ int remap_page_range(struct vm_area_stru
vma->vm_flags |= VM_IO | VM_RESERVED;
spin_lock(&mm->page_table_lock);
+ ioproc_invalidate_range(vma, beg, end);
do {
pmd_t *pmd = pmd_alloc(mm, dir, from);
error = -ENOMEM;
@@ -1106,6 +1111,7 @@ int remap_page_range(struct vm_area_stru
/*
* Why flush? remap_pte_range has a BUG_ON for !pte_none()
*/
+ ioproc_update_range(vma, beg, end);
flush_tlb_range(vma, beg, end);
spin_unlock(&mm->page_table_lock);
return error;
@@ -1194,6 +1200,7 @@ static int do_wp_page(struct mm_struct *
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
pte_unmap(page_table);
+ ioproc_update_page(vma, address);
spin_unlock(&mm->page_table_lock);
return VM_FAULT_MINOR;
}
@@ -1226,6 +1233,7 @@ static int do_wp_page(struct mm_struct *
++mm->rss;
else
page_remove_rmap(old_page);
+ ioproc_invalidate_page(vma, address);
break_cow(vma, new_page, address, page_table);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
@@ -1234,6 +1242,7 @@ static int do_wp_page(struct mm_struct *
new_page = old_page;
}
pte_unmap(page_table);
+ ioproc_update_page(vma, address);
page_cache_release(new_page);
page_cache_release(old_page);
spin_unlock(&mm->page_table_lock);
@@ -1630,6 +1639,7 @@ static int do_swap_page(struct mm_struct
update_mmu_cache(vma, address, pte);
lazy_mmu_prot_update(pte);
pte_unmap(page_table);
+ ioproc_update_page(vma, address);
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -1695,6 +1705,7 @@ do_anonymous_page(struct mm_struct *mm,
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
lazy_mmu_prot_update(entry);
+ ioproc_update_page(vma, addr);
spin_unlock(&mm->page_table_lock);
out:
return VM_FAULT_MINOR;
@@ -1813,6 +1824,7 @@ retry:
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
+ ioproc_update_page(vma, address);
spin_unlock(&mm->page_table_lock);
out:
return ret;
@@ -1998,6 +2010,7 @@ int make_pages_present(unsigned long add
return ret;
return ret == len ? 0 : -1;
}
+EXPORT_SYMBOL(make_pages_present);
/*
* Map a vmalloc()-space virtual address to the physical page.
Index: linux-269-5502/mm/mmap.c
===================================================================
--- linux-269-5502.orig/mm/mmap.c
+++ linux-269-5502/mm/mmap.c
@@ -15,6 +15,7 @@
#include <linux/init.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/ioproc.h>
#include <linux/personality.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
@@ -1703,6 +1704,7 @@ static void unmap_region(struct mm_struc
unsigned long nr_accounted = 0;
lru_add_drain();
+ ioproc_invalidate_range(vma, start, end);
tlb = tlb_gather_mmu(mm, 0);
unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
@@ -1995,6 +1997,7 @@ void exit_mmap(struct mm_struct *mm)
spin_lock(&mm->page_table_lock);
+ ioproc_release(mm);
tlb = tlb_gather_mmu(mm, 1);
flush_cache_mm(mm);
/* Use ~0UL here to ensure all VMAs in the mm are unmapped */
Index: linux-269-5502/ipc/shm.c
===================================================================
--- linux-269-5502.orig/ipc/shm.c
+++ linux-269-5502/ipc/shm.c
@@ -26,6 +26,7 @@
#include <linux/proc_fs.h>
#include <linux/shmem_fs.h>
#include <linux/security.h>
+#include <linux/module.h>
#include <linux/audit.h>
#include <asm/uaccess.h>
@@ -856,6 +857,44 @@ asmlinkage long sys_shmdt(char __user *s
return retval;
}
+/*
+ * Mark all segments created by this process for destruction
+ */
+int shm_cleanup (void)
+{
+ int i;
+
+ down(&shm_ids.sem);
+
+ for (i = 0; i <= shm_ids.max_id; i++) {
+ struct shmid_kernel *shp;
+
+ shp = shm_lock(i);
+ if (shp != NULL) {
+ /* mark this segment for destruction if we created it */
+ if (current->pid == shp->shm_cprid)
+ {
+ /* copy of IPC_RMID code */
+ if (shp->shm_nattch) {
+ shp->shm_flags |= SHM_DEST;
+ /* do not find it any more */
+ shp->shm_perm.key = IPC_PRIVATE;
+ } else {
+ shm_destroy(shp);
+ continue;
+ }
+ }
+
+ shm_unlock(shp);
+ }
+ }
+
+ up(&shm_ids.sem);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(shm_cleanup);
+
#ifdef CONFIG_PROC_FS
static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
{
Index: linux-269-5502/include/linux/init_task.h
===================================================================
--- linux-269-5502.orig/include/linux/init_task.h
+++ linux-269-5502/include/linux/init_task.h
@@ -2,6 +2,7 @@
#define _LINUX__INIT_TASK_H
#include <linux/file.h>
+#include <linux/ptrack.h>
#define INIT_FILES \
{ \
@@ -112,6 +113,7 @@ extern struct group_info init_groups;
.proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
+ INIT_TASK_PTRACK(tsk) \
}
Index: linux-269-5502/include/linux/ioproc.h
===================================================================
--- /dev/null
+++ linux-269-5502/include/linux/ioproc.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2006 Quadrics Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/*
+ * Callbacks for IO processor page table updates.
+ */
+
+#ifndef __LINUX_IOPROC_H__
+#define __LINUX_IOPROC_H__
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+
+typedef struct ioproc_ops {
+ struct ioproc_ops *next;
+ void *arg;
+
+ void (*release) (void *arg, struct mm_struct * mm);
+ void (*sync_range) (void *arg, struct vm_area_struct * vma,
+ unsigned long start, unsigned long end);
+ void (*invalidate_range) (void *arg, struct vm_area_struct * vma,
+ unsigned long start, unsigned long end);
+ void (*update_range) (void *arg, struct vm_area_struct * vma,
+ unsigned long start, unsigned long end);
+
+ void (*change_protection) (void *arg, struct vm_area_struct * vma,
+ unsigned long start, unsigned long end,
+ pgprot_t newprot);
+
+ void (*sync_page) (void *arg, struct vm_area_struct * vma,
+ unsigned long address);
+ void (*invalidate_page) (void *arg, struct vm_area_struct * vma,
+ unsigned long address);
+ void (*update_page) (void *arg, struct vm_area_struct * vma,
+ unsigned long address);
+
+} ioproc_ops_t;
+
+/* IOPROC Registration
+ *
+ * Called by the IOPROC device driver to register its interest in page table
+ * changes for the process associated with the supplied mm_struct
+ *
+ * The caller should first allocate and fill out an ioproc_ops structure with
+ * the function pointers initialised to the device driver specific code for
+ * each callback. If the device driver doesn't have code for a particular
+ * callback then it should set the function pointer to be NULL.
+ * The ioproc_ops arg parameter will be passed unchanged as the first argument
+ * to each callback function invocation.
+ *
+ * The ioproc registration is not inherited across fork() and should be called
+ * once for each process that the IOPROC device driver is interested in.
+ *
+ * Must be called holding the mm->page_table_lock
+ */
+extern int ioproc_register_ops(struct mm_struct *mm, struct ioproc_ops *ip);
+
+/* IOPROC De-registration
+ *
+ * Called by the IOPROC device driver when it is no longer interested in page
+ * table changes for the process associated with the supplied mm_struct
+ *
+ * Normally this is not needed to be called as the ioproc_release() code will
+ * automatically unlink the ioproc_ops struct from the mm_struct as the
+ * process exits
+ *
+ * Must be called holding the mm->page_table_lock
+ */
+extern int ioproc_unregister_ops(struct mm_struct *mm, struct ioproc_ops *ip);
+
+#ifdef CONFIG_IOPROC
+
+/* IOPROC Release
+ *
+ * Called during exit_mmap() as all vmas are torn down and unmapped.
+ *
+ * Also unlinks the ioproc_ops structure from the mm list as it goes.
+ *
+ * No need for locks as the mm can no longer be accessed at this point
+ *
+ */
+static inline void ioproc_release(struct mm_struct *mm)
+{
+ struct ioproc_ops *cp;
+
+ while ((cp = mm->ioproc_ops) != NULL) {
+ mm->ioproc_ops = cp->next;
+
+ if (cp->release)
+ cp->release(cp->arg, mm);
+ }
+}
+
+/* IOPROC SYNC RANGE
+ *
+ * Called when a memory map is synchronised with its disk image i.e. when the
+ * msync() syscall is invoked. Any future read or write to the associated
+ * pages by the IOPROC should cause the page to be marked as referenced or
+ * modified.
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_sync_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->sync_range)
+ cp->sync_range(cp->arg, vma, start, end);
+}
+
+/* IOPROC INVALIDATE RANGE
+ *
+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the
+ * user or paged out by the kernel.
+ *
+ * After this call the IOPROC must not access the physical memory again unless
+ * a new translation is loaded.
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_invalidate_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->invalidate_range)
+ cp->invalidate_range(cp->arg, vma, start, end);
+}
+
+/* IOPROC UPDATE RANGE
+ *
+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk
+ * up, when breaking COW or faulting in an anonymous page of memory.
+ *
+ * These give the IOPROC device driver the opportunity to load translations
+ * speculatively, which can improve performance by avoiding device translation
+ * faults.
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_update_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->update_range)
+ cp->update_range(cp->arg, vma, start, end);
+}
+
+/* IOPROC CHANGE PROTECTION
+ *
+ * Called when the protection on a region of memory is changed i.e. when the
+ * mprotect() syscall is invoked.
+ *
+ * The IOPROC must not be able to write to a read-only page, so if the
+ * permissions are downgraded then it must honour them. If they are upgraded
+ * it can treat this in the same way as the ioproc_update_[range|sync]() calls
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_change_protection(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, pgprot_t newprot)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->change_protection)
+ cp->change_protection(cp->arg, vma, start, end,
+ newprot);
+}
+
+/* IOPROC SYNC PAGE
+ *
+ * Called when a memory map is synchronised with its disk image i.e. when the
+ * msync() syscall is invoked. Any future read or write to the associated page
+ * by the IOPROC should cause the page to be marked as referenced or modified.
+ *
+ * Not currently called as msync() calls ioproc_sync_range() instead
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_sync_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->sync_page)
+ cp->sync_page(cp->arg, vma, addr);
+}
+
+/* IOPROC INVALIDATE PAGE
+ *
+ * Called whenever a valid PTE is unloaded e.g. when a page is unmapped by the
+ * user or paged out by the kernel.
+ *
+ * After this call the IOPROC must not access the physical memory again unless
+ * a new translation is loaded.
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_invalidate_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->invalidate_page)
+ cp->invalidate_page(cp->arg, vma, addr);
+}
+
+/* IOPROC UPDATE PAGE
+ *
+ * Called whenever a valid PTE is loaded e.g. mmaping memory, moving the brk
+ * up, when breaking COW or faulting in an anoymous page of memory.
+ *
+ * These give the IOPROC device the opportunity to load translations
+ * speculatively, which can improve performance by avoiding device translation
+ * faults.
+ *
+ * Called holding the mm->page_table_lock
+ */
+static inline void
+ioproc_update_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct ioproc_ops *cp;
+
+ for (cp = vma->vm_mm->ioproc_ops; cp; cp = cp->next)
+ if (cp->update_page)
+ cp->update_page(cp->arg, vma, addr);
+}
+
+#else
+
+/* ! CONFIG_IOPROC so make all hooks empty */
+
+#define ioproc_release(mm) do { } while (0)
+#define ioproc_sync_range(vma, start, end) do { } while (0)
+#define ioproc_invalidate_range(vma, start, end) do { } while (0)
+#define ioproc_update_range(vma, start, end) do { } while (0)
+#define ioproc_change_protection(vma, start, end, prot) do { } while (0)
+#define ioproc_sync_page(vma, addr) do { } while (0)
+#define ioproc_invalidate_page(vma, addr) do { } while (0)
+#define ioproc_update_page(vma, addr) do { } while (0)
+
+#endif /* CONFIG_IOPROC */
+#endif /* __LINUX_IOPROC_H__ */
Index: linux-269-5502/include/linux/sched.h
===================================================================
--- linux-269-5502.orig/include/linux/sched.h
+++ linux-269-5502/include/linux/sched.h
@@ -185,6 +185,9 @@ extern signed long schedule_timeout_unin
asmlinkage void schedule(void);
struct namespace;
+#ifdef CONFIG_IOPROC
+struct ioproc_ops;
+#endif
/* Maximum number of active map areas.. This is a random (large) number */
#define DEFAULT_MAX_MAP_COUNT 65536
@@ -260,6 +263,11 @@ struct mm_struct {
struct kioctx *ioctx_list;
struct kioctx default_kioctx;
+
+#ifdef CONFIG_IOPROC
+ /* hooks for io devices with advanced RDMA capabilities */
+ struct ioproc_ops *ioproc_ops;
+#endif
};
extern int mmlist_nr;
@@ -635,6 +643,10 @@ struct task_struct {
struct mempolicy *mempolicy;
short il_next; /* could be shared with used_math */
#endif
+#ifdef CONFIG_PTRACK
+/* process tracking callback */
+ struct list_head ptrack_list;
+#endif
};
static inline pid_t process_group(struct task_struct *tsk)
Index: linux-269-5502/include/linux/ptrack.h
===================================================================
--- /dev/null
+++ linux-269-5502/include/linux/ptrack.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2000 Regents of the University of California
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Derived from exit_actn.c by
+ * Copyright (C) 2003 Quadrics Ltd.
+ *
+ */
+#ifndef __LINUX_PTRACK_H
+#define __LINUX_PTRACK_H
+
+/*
+ * Process tracking - this allows a module to keep track of processes
+ * in order that it can manage all tasks derived from a single process.
+ */
+
+#define PTRACK_PHASE_CLONE 1
+#define PTRACK_PHASE_CLONE_FAIL 2
+#define PTRACK_PHASE_EXEC 3
+#define PTRACK_PHASE_EXIT 4
+
+#define PTRACK_FINISHED 0
+#define PTRACK_INNHERIT 1
+#define PTRACK_DENIED 2
+
+#ifdef CONFIG_PTRACK
+
+typedef int (*ptrack_callback_t)(void *arg, int phase, struct task_struct *child);
+
+struct ptrack_desc {
+ struct list_head link;
+ ptrack_callback_t callback;
+ void *arg;
+};
+
+extern int ptrack_register (ptrack_callback_t callback, void *arg);
+extern void ptrack_deregister (ptrack_callback_t callback, void *arg);
+extern int ptrack_registered (ptrack_callback_t callback, void *arg);
+
+extern int ptrack_call_callbacks (int phase, struct task_struct *child);
+
+#define INIT_TASK_PTRACK(tsk) \
+ .ptrack_list = LIST_HEAD_INIT(tsk.ptrack_list)
+
+#else
+#define ptrack_call_callbacks(phase, child) (0)
+
+#define INIT_TASK_PTRACK(tsk)
+
+#endif
+
+#endif /* __LINUX_PTRACK_H */
Index: linux-269-5502/include/asm-ia64/param.h
===================================================================
--- linux-269-5502.orig/include/asm-ia64/param.h
+++ linux-269-5502/include/asm-ia64/param.h
@@ -27,7 +27,7 @@
*/
# define HZ 32
# else
-# define HZ 1024
+# define HZ 100
# endif
# define USER_HZ HZ
# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */
Index: linux-269-5502/include/asm-i386/param.h
===================================================================
--- linux-269-5502.orig/include/asm-i386/param.h
+++ linux-269-5502/include/asm-i386/param.h
@@ -2,7 +2,7 @@
#define _ASMi386_PARAM_H
#ifdef __KERNEL__
-# define HZ 1000 /* Internal kernel timer frequency */
+# define HZ 100 /* Internal kernel timer frequency */
# define USER_HZ 100 /* .. some user interfaces are in "ticks" */
# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#endif
Index: linux-269-5502/include/asm-x86_64/param.h
===================================================================
--- linux-269-5502.orig/include/asm-x86_64/param.h
+++ linux-269-5502/include/asm-x86_64/param.h
@@ -2,7 +2,7 @@
#define _ASMx86_64_PARAM_H
#ifdef __KERNEL__
-# define HZ 1000 /* Internal kernel timer frequency */
+# define HZ 100 /* Internal kernel timer frequency */
# define USER_HZ 100 /* .. some user interfaces are in "ticks */
#define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#endif